├── demo └── __init__.py ├── src ├── news │ ├── src │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_mongo_db_client.py │ │ └── finance_news_scraper │ │ │ ├── __init__.py │ │ │ ├── news_sources.py │ │ │ ├── sentiment.py │ │ │ ├── __main__.py │ │ │ └── mongo_client.py │ ├── requirements.txt │ ├── setup.py │ ├── Dockerfile │ ├── README.md │ ├── pyproject.toml │ └── LICENSE └── stocks │ ├── src │ └── finance_stock_scraper │ │ ├── __init__.py │ │ ├── model │ │ ├── __init__.py │ │ ├── Intervals.py │ │ └── Ticker.py │ │ ├── ExecutionContext.py │ │ ├── YFDataProvider.py │ │ ├── TickerRepository.py │ │ ├── __main__.py │ │ ├── QuestClient.py │ │ └── workflow.py │ ├── requirements.txt │ ├── setup.py │ ├── Dockerfile │ ├── README.md │ ├── pyproject.toml │ └── LICENSE ├── .env ├── news └── rss-feeds.json ├── tickers ├── downlaod_nasdaq.py ├── download_eurex.py ├── download_Financial_Markets_UK.py ├── download_helper.py ├── EUREX.csv └── nasdaq.csv ├── .github └── workflows │ ├── publish_docker_test.yml │ ├── publish_on_docker_hub.yml │ ├── python-publish-test.yml │ └── publish_on_pipy.yml ├── LICENSE ├── README.md ├── .gitignore └── docker-compose.yml /demo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/news/src/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/news/src/finance_news_scraper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/stocks/src/finance_stock_scraper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/stocks/src/finance_stock_scraper/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/stocks/requirements.txt: -------------------------------------------------------------------------------- 1 | pytz 2 | pandas 3 | yfinance 4 | questdb 5 | requests 6 | pandas-market-calendars 7 | tqdm -------------------------------------------------------------------------------- /src/stocks/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | setup(version=os.environ.get("PACKAGE_VERSION","DEBUG")) -------------------------------------------------------------------------------- /src/news/requirements.txt: -------------------------------------------------------------------------------- 1 | pytz 2 | pandas 3 | numpy 4 | requests 5 | tqdm 6 | pymongo 7 | transformers[torch] 8 | beautifulsoup4 9 | newspaper3k 10 | dateparser 11 | cchardet 12 | google-news-feed -------------------------------------------------------------------------------- /src/stocks/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | ADD requirements.txt /requirements.txt 4 | RUN pip install -r /requirements.txt 5 | 6 | RUN mkdir /var/lib/stock-scraper 7 | RUN mkdir /app 8 | ADD ./src /app 9 | WORKDIR /app 10 | CMD ["python","-m","finance_stock_scraper"] -------------------------------------------------------------------------------- /src/news/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup,find_packages 2 | import os 3 | 4 | setup( 5 | version=os.environ.get("PACKAGE_VERSION","0.0.0"), 6 | package_dir={"":"src"}, 7 | packages=find_packages(where="./src", exclude=("*.tests", "*.tests.*", "tests.*", "tests")) 8 | ) -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | INFLUX_DB_USER="admin" 2 | INFLUX_DB_PASSWORD="adminadmin" # PLEASE CHANGE ! 3 | INFLUX_DB_ADMIN_TOKEN="OYRyhNIDCQFe1WMJeJnljPV323EWA3GE45CA1Mpdx5TBbw-pxYqfGlFgAvdtrbKgZcJZnQn7oOhLoRbsUOhnuw==" # PLEASE CHANGE ! 4 | 5 | MONGO_DB_USER="admin" 6 | MONGO_DB_PASDWORD="asda2sdqw12e4asfd" # PLEASE CHANGE! -------------------------------------------------------------------------------- /news/rss-feeds.json: -------------------------------------------------------------------------------- 1 | { 2 | "SeekingAlpha":"https://seekingalpha.com/market_currents.xml", 3 | "CNBC":"http://www.cnbc.com/id/19746125/device/rss/rss.xml", 4 | "Fortune":"https://fortune.com/feed", 5 | "FinancialTimes":"https://www.ft.com/?format=rss", 6 | "Investing.com":"https://www.investing.com/rss/news.rss", 7 | "YahooNews":"https://finance.yahoo.com/news/rssindex" 8 | } -------------------------------------------------------------------------------- /src/news/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | ADD requirements.txt /requirements.txt 4 | RUN pip install -r /requirements.txt 5 | 6 | RUN mkdir /var/lib/news-scraper 7 | RUN mkdir /var/lib/news-scraper/tickers 8 | RUN mkdir /var/lib/news-scraper/model 9 | RUN mkdir /var/lib/news-scraper/rss 10 | 11 | RUN mkdir /app 12 | ADD ./src /app 13 | WORKDIR /app 14 | CMD ["python","-m","finance_news_scraper"] 15 | 16 | -------------------------------------------------------------------------------- /tickers/downlaod_nasdaq.py: -------------------------------------------------------------------------------- 1 | from yahoo_fin import stock_info as si 2 | import pandas as pd 3 | from download_helper import get_company_names 4 | 5 | tickers = [] 6 | 7 | tickers += si.tickers_sp500() 8 | tickers += si.tickers_dow() 9 | tickers = list(set(tickers)) 10 | 11 | short_company_names,long_company_names = get_company_names(tickers) 12 | df = pd.DataFrame({'tickers':tickers,'shortNames':short_company_names,'longNames':long_company_names}) 13 | df.to_csv("./tickers/NASDAQ.csv",index=False,header=True) -------------------------------------------------------------------------------- /tickers/download_eurex.py: -------------------------------------------------------------------------------- 1 | from yahoo_fin import stock_info as si 2 | import pandas as pd 3 | from download_helper import get_company_names 4 | 5 | tickers = [] 6 | tickers += list(pd.read_html("https://en.wikipedia.org/wiki/DAX")[3]["Ticker symbol"]) 7 | tickers = list(set(tickers)) 8 | 9 | short_company_names,long_company_names = get_company_names(tickers) 10 | df = pd.DataFrame({'tickers':tickers,'shortNames':short_company_names,'longNames':long_company_names}) 11 | df.to_csv("./tickers/EUREX.csv",index=False,header=True) -------------------------------------------------------------------------------- /tickers/download_Financial_Markets_UK.py: -------------------------------------------------------------------------------- 1 | from yahoo_fin import stock_info as si 2 | import pandas as pd 3 | from download_helper import get_company_names 4 | 5 | tickers = [] 6 | 7 | tickers += si.tickers_ftse100() 8 | tickers += si.tickers_ftse250() 9 | tickers = list(set(tickers)) 10 | 11 | short_company_names,long_company_names = get_company_names(tickers) 12 | df = pd.DataFrame({'tickers':tickers,'shortNames':short_company_names,'longNames':long_company_names}) 13 | df.to_csv("./tickers/Financial_Markets_UK.csv",index=False,header=True) -------------------------------------------------------------------------------- /src/stocks/src/finance_stock_scraper/ExecutionContext.py: -------------------------------------------------------------------------------- 1 | from finance_stock_scraper.TickerRepository import TickerRepository 2 | from finance_stock_scraper.YFDataProvider import YFDataProvider 3 | from finance_stock_scraper.QuestClient import QuestClient 4 | 5 | class ExecutionContext(object): 6 | def __init__(self,tickerRepository:TickerRepository,yfDataProcider:YFDataProvider,questClient:QuestClient) -> None: 7 | self.tickerRepository = tickerRepository 8 | self.yfDataProcider = yfDataProcider 9 | self.questClient = questClient -------------------------------------------------------------------------------- /src/stocks/README.md: -------------------------------------------------------------------------------- 1 | # finance-stock-scraper 2 | Collects stock prices periodically after a market closes from the [Yahoo-Finance-API](https://finance.yahoo.com/) and stores them in a [QuestDB](https://questdb.io/). 3 | 4 | ## Usage 5 | Install the module with `pip install finance-stock-scraper` and run the script with `python -m finance_stock_scraper` or use the [docker-image](https://hub.docker.com/repository/docker/llukas22/finance-stock-scraper) 6 | ___ 7 | For an example usage and configuration see [here](https://github.com/LLukas22/Finance-Data-Scraper). -------------------------------------------------------------------------------- /src/news/README.md: -------------------------------------------------------------------------------- 1 | # finance-news-scraper 2 | Periodically collect articles from [Google-News](https://news.google.com/topstories), [FinViz](https://finviz.com/) or RSS-Feeds and store them in a [MongoDB](https://www.mongodb.com/). 3 | 4 | ## Usage 5 | Install the module with `pip install finance-news-scraper` and run the script with `python -m finance_news_scraper` or use the [docker-image](https://hub.docker.com/repository/docker/llukas22/finance-news-scraper) 6 | ___ 7 | For an example usage and configuration see [here](https://github.com/LLukas22/Finance-Data-Scraper). -------------------------------------------------------------------------------- /src/stocks/src/finance_stock_scraper/model/Intervals.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class IntervalTypes(Enum): 4 | Intraday = 0 5 | Daily = 1 6 | 7 | INTERVALS = { 8 | "1m" : IntervalTypes.Intraday, 9 | "2m" : IntervalTypes.Intraday, 10 | "5m" : IntervalTypes.Intraday, 11 | "15m": IntervalTypes.Intraday, 12 | "30m": IntervalTypes.Intraday, 13 | "60m": IntervalTypes.Intraday, 14 | "90m": IntervalTypes.Intraday, 15 | "1h" : IntervalTypes.Intraday, 16 | "1d" : IntervalTypes.Daily, 17 | "5d" : IntervalTypes.Daily, 18 | "1wk": IntervalTypes.Daily, 19 | "1mo": IntervalTypes.Daily, 20 | "3mo": IntervalTypes.Daily, 21 | } -------------------------------------------------------------------------------- /.github/workflows/publish_docker_test.yml: -------------------------------------------------------------------------------- 1 | name: Test Publish to Docker-Hub 2 | 3 | on: workflow_dispatch 4 | 5 | jobs: 6 | docker: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: checkout 10 | uses: actions/checkout@v3 11 | 12 | - name: Login to DockerHub 13 | uses: docker/login-action@v2 14 | with: 15 | username: ${{ secrets.DOCKERHUB_USERNAME }} 16 | password: ${{ secrets.DOCKERHUB_TOKEN }} 17 | 18 | - name: Build and push news scraper 19 | uses: docker/build-push-action@v3 20 | with: 21 | context: ./src/news 22 | file: ./src/news/Dockerfile 23 | builder: ${{ steps.buildx.outputs.name }} 24 | push: true 25 | tags: ${{ secrets.DOCKERHUB_USERNAME }}/finance-data-scraper:latest 26 | -------------------------------------------------------------------------------- /tickers/download_helper.py: -------------------------------------------------------------------------------- 1 | from yahooquery import Ticker 2 | from tqdm import tqdm 3 | 4 | def get_company_names(tickers:list[str])->tuple[list[str],list[str]]: 5 | short_company_names = [] 6 | long_company_names = [] 7 | infos = Ticker(tickers).quote_type 8 | for ticker in tqdm(tickers): 9 | info = infos[ticker] 10 | if info: 11 | if "shortName" in info: 12 | short_company_names.append(info["shortName"]) 13 | else: 14 | short_company_names.append("") 15 | 16 | if "longName" in info: 17 | long_company_names.append(info["longName"]) 18 | else: 19 | long_company_names.append("") 20 | else: 21 | short_company_names.append("") 22 | long_company_names.append("") 23 | return short_company_names,long_company_names -------------------------------------------------------------------------------- /src/stocks/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "setuptools-scm"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "finance_stock_scraper" 7 | dynamic = ["version"] 8 | authors = [ 9 | { name="Lukas Kreussel"}, 10 | ] 11 | description = "Collect, store and access stock exchange data locally" 12 | readme = "README.md" 13 | license = { file="LICENSE" } 14 | requires-python = ">=3.10" 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | "License :: OSI Approved :: MIT License", 18 | "Operating System :: OS Independent", 19 | ] 20 | 21 | dependencies=[ 22 | "pytz>=2022.1", 23 | "pandas>=1.4.2", 24 | "yfinance>=0.1.70", 25 | "questdb>=1.0.0", 26 | "requests>=2.28.1", 27 | "pandas-market-calendars>=3.4", 28 | "tqdm>=4.64.0" 29 | ] 30 | 31 | [project.urls] 32 | "Homepage" = "https://github.com/LLukas22/Finance-Data-Scraper" 33 | "Bug Tracker" = "https://github.com/LLukas22/Finance-Data-Scraper/issues" -------------------------------------------------------------------------------- /src/stocks/src/finance_stock_scraper/YFDataProvider.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import pandas as pd 3 | import yfinance as yf 4 | from yfinance import shared 5 | 6 | class YFDataProvider(object): 7 | def __init__(self) -> None: 8 | pass 9 | 10 | def get_data(self,tickers:list[str],start_date:datetime.datetime,end_date:datetime.datetime,interval:str)->tuple[pd.DataFrame,dict]: 11 | if len(tickers) < 1: 12 | return None 13 | 14 | ticker_string = " ".join(tickers) 15 | data = yf.download(ticker_string, start_date, end_date, interval=interval, threads=True, group_by = 'ticker', progress=True) 16 | return data,shared._ERRORS 17 | 18 | def get_data_from_period(self,tickers:list[str],interval:str,period:str="max")->tuple[pd.DataFrame,dict]: 19 | if len(tickers) < 1: 20 | return None 21 | 22 | ticker_string = " ".join(tickers) 23 | data = yf.download(ticker_string, period = period, interval=interval, threads=True, group_by = 'ticker', progress=True) 24 | return data,shared._ERRORS -------------------------------------------------------------------------------- /src/news/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "setuptools-scm"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "finance_news_scraper" 7 | dynamic = ["version"] 8 | authors = [ 9 | { name="Lukas Kreussel"}, 10 | ] 11 | description = "Collect, store and access finance news locally" 12 | readme = "README.md" 13 | license = { file="LICENSE" } 14 | requires-python = ">=3.10" 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | "License :: OSI Approved :: MIT License", 18 | "Operating System :: OS Independent", 19 | ] 20 | 21 | dependencies=[ 22 | "pytz>=2022.1", 23 | "pandas>=1.4.2", 24 | "numpy>=1.22.3", 25 | "requests>=2.28.1", 26 | "tqdm>=4.64.0", 27 | "pymongo>=4.2.0", 28 | "transformers[torch]>=4.19.4", 29 | "beautifulsoup4>=4.11.1", 30 | "newspaper3k>=0.2.8", 31 | "dateparser>=1.1.1", 32 | "cchardet>=2.1.7", 33 | "google-news-feed>=1.0.0" 34 | ] 35 | 36 | [project.urls] 37 | "Homepage" = "https://github.com/LLukas22/Finance-Data-Scraper" 38 | "Bug Tracker" = "https://github.com/LLukas22/Finance-Data-Scraper/issues" -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 LLukas22 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/news/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 LLukas22 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/stocks/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 LLukas22 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/stocks/src/finance_stock_scraper/model/Ticker.py: -------------------------------------------------------------------------------- 1 | import pandas_market_calendars as mcal 2 | from pandas_market_calendars import MarketCalendar 3 | import pandas as pd 4 | import datetime 5 | 6 | class Ticker(object): 7 | trading_times:MarketCalendar 8 | 9 | def __init__(self,ticker:str,exchange:str): 10 | self.ticker = ticker.upper() 11 | self.exchange = exchange.upper() 12 | self.trading_times = None 13 | 14 | def _init_calendar(self)->None: 15 | self.trading_times = mcal.get_calendar(self.exchange) 16 | 17 | def get_trading_times(self,start_date:datetime.date,end_date:datetime.date)->pd.DataFrame: 18 | if self.trading_times is None: 19 | self._init_calendar() 20 | return self.trading_times.schedule(start_date=start_date, end_date=end_date) 21 | 22 | def is_in_trading_times(self,date:datetime.date)->bool: 23 | schedule = self.get_trading_times(date,date) 24 | if len(schedule) == 0: 25 | return False 26 | return date == schedule.index[0].date() 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /.github/workflows/publish_on_docker_hub.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Docker-Hub 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | docker: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: checkout 12 | uses: actions/checkout@v3 13 | 14 | - id: get_version 15 | uses: battila7/get-version-action@v2 16 | 17 | - name: print version 18 | run: echo ${{ steps.get_version.outputs.version-without-v }} 19 | 20 | - name: Login to DockerHub 21 | uses: docker/login-action@v2 22 | with: 23 | username: ${{ secrets.DOCKERHUB_USERNAME }} 24 | password: ${{ secrets.DOCKERHUB_TOKEN }} 25 | 26 | - name: Build and push news scraper 27 | uses: docker/build-push-action@v3 28 | with: 29 | context: ./src/news 30 | file: ./src/news/Dockerfile 31 | builder: ${{ steps.buildx.outputs.name }} 32 | push: true 33 | tags: ${{ secrets.DOCKERHUB_USERNAME }}/finance-news-scraper:${{ steps.get_version.outputs.version-without-v }} 34 | 35 | - name: Build and push stock scraper 36 | uses: docker/build-push-action@v3 37 | with: 38 | context: ./src/stocks 39 | file: ./src/stocks/Dockerfile 40 | builder: ${{ steps.buildx.outputs.name }} 41 | push: true 42 | tags: ${{ secrets.DOCKERHUB_USERNAME }}/finance-stock-scraper:${{ steps.get_version.outputs.version-without-v }} 43 | -------------------------------------------------------------------------------- /.github/workflows/python-publish-test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package to Test PyPi 10 | 11 | on: workflow_dispatch 12 | 13 | env: 14 | GLOBAL_PACKAGE_VERSION: 0.0.2 15 | 16 | permissions: 17 | contents: read 18 | 19 | jobs: 20 | deploy: 21 | 22 | runs-on: ubuntu-latest 23 | 24 | steps: 25 | - name: update env 26 | run: echo "PACKAGE_VERSION=$GLOBAL_PACKAGE_VERSION" >> $GITHUB_ENV 27 | 28 | - name: checkout 29 | uses: actions/checkout@v3 30 | 31 | - name: Set up Python 32 | uses: actions/setup-python@v3 33 | with: 34 | python-version: '3.10' 35 | - name: Install dependencies 36 | run: | 37 | python -m pip install --upgrade pip 38 | pip install build 39 | 40 | - name: Build news_scraper 41 | working-directory: ./src/news 42 | run: python -m build 43 | - name: Publish news_scraper 44 | uses: pypa/gh-action-pypi-publish@v1.5.1 45 | with: 46 | user: __token__ 47 | password: ${{ secrets.PYPI_TEST_TOKEN }} 48 | repository_url: https://test.pypi.org/legacy/ 49 | packages_dir: ./src/news/dist/ 50 | 51 | - name: Build stock_scraper 52 | working-directory: ./src/stocks 53 | run: python -m build 54 | - name: Publish stock_scraper 55 | uses: pypa/gh-action-pypi-publish@v1.5.1 56 | with: 57 | user: __token__ 58 | password: ${{ secrets.PYPI_TEST_TOKEN }} 59 | repository_url: https://test.pypi.org/legacy/ 60 | packages_dir: ./src/stocks/dist/ 61 | -------------------------------------------------------------------------------- /tickers/EUREX.csv: -------------------------------------------------------------------------------- 1 | tickers,shortNames,longNames 2 | BAS.DE,BASF SE,BASF SE 3 | BNR.DE,BRENNTAG SE NA O.N.,Brenntag SE 4 | DTG.DE,DAIMLER TRUCK HOLD,Daimler Truck Holding AG 5 | 1COV.DE,COVESTRO AG,Covestro AG 6 | PAH3.DE,PORSCHE AUTOM.HLDG VZO,Porsche Automobil Holding SE 7 | HNR1.DE,HANNOVER RUECK SE NA O.N.,Hannover Rück SE 8 | DBK.DE,DEUTSCHE BANK AG,Deutsche Bank Aktiengesellschaft 9 | QIA.DE,"QIAGEN NV EO -,01",Qiagen N.V. 10 | ZAL.DE,ZALANDO SE,Zalando SE 11 | DPW.DE,DEUTSCHE POST AG,Deutsche Post AG 12 | VOW3.DE,VOLKSWAGEN AG,Volkswagen AG 13 | MBG.DE,MERCEDES-BENZ GROUP,Mercedes-Benz Group AG 14 | SIE.DE,SIEMENS AG,Siemens Aktiengesellschaft 15 | ADS.DE,ADIDAS AG,adidas AG 16 | EOAN.DE,E.ON SE,E.ON SE 17 | MRK.DE,MERCK KGAA,MERCK Kommanditgesellschaft auf Aktien 18 | SHL.DE,SIEMENS HEALTH.AG NA O.N.,Siemens Healthineers AG 19 | ALV.DE,ALLIANZ SE,Allianz SE 20 | DTE.DE,DEUTSCHE TELEKOM AG,Deutsche Telekom AG 21 | CON.DE,CONTINENTAL AG,Continental Aktiengesellschaft 22 | HFG.DE,HELLOFRESH SE INH O.N.,HelloFresh SE 23 | FRE.DE,FRESENIUS SE&CO KGAA,Fresenius SE & Co. KGaA 24 | IFX.DE,INFINEON TECHNOLOGIES AG,Infineon Technologies AG 25 | LIN.DE,"LINDE PLC EO 0,001",Linde plc 26 | FME.DE,FRESENIUS MEDICAL CARE AG & CO ,Fresenius Medical Care AG & Co. KGaA 27 | MUV2.DE,MUENCHENER RUECKVERSICHERUNGS A,Münchener Rückversicherungs-Gesellschaft Aktiengesellschaft in München 28 | SY1.DE,SYMRISE AG INH. O.N.,Symrise AG 29 | RWE.DE,RWE AG INH O.N.,RWE Aktiengesellschaft 30 | SAP.DE,SAP SE,SAP SE 31 | AIR.DE,AIRBUS SE,Airbus SE 32 | HEI.DE,HEIDELBERGCEMENT AG O.N.,HeidelbergCement AG 33 | MTX.DE,MTU AERO ENGINES NA O.N.,MTU Aero Engines AG 34 | PUM.DE,PUMA SE,PUMA SE 35 | BEI.DE,BEIERSDORF AG O.N.,Beiersdorf Aktiengesellschaft 36 | SRT3.DE,SARTORIUS AG VZO O.N.,Sartorius Aktiengesellschaft 37 | DB1.DE,DEUTSCHE BOERSE NA O.N.,Deutsche Börse AG 38 | BAYN.DE,BAYER AG,Bayer Aktiengesellschaft 39 | VNA.DE,VONOVIA SE NA O.N.,Vonovia SE 40 | HEN3.DE,HENKEL AG&CO. KGAA,Henkel AG & Co. KGaA 41 | BMW.DE,BAYERISCHE MOTOREN WERKE AG,Bayerische Motoren Werke Aktiengesellschaft 42 | -------------------------------------------------------------------------------- /.github/workflows/publish_on_pipy.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package to PyPi 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - name: checkout 25 | uses: actions/checkout@v3 26 | 27 | - id: get_version 28 | uses: battila7/get-version-action@v2 29 | 30 | - name: print version 31 | run: echo ${{ steps.get_version.outputs.version-without-v }} 32 | 33 | - name: update env 34 | run: echo "PACKAGE_VERSION=${{ steps.get_version.outputs.version-without-v }}" >> $GITHUB_ENV 35 | 36 | - name: print package version 37 | run: echo $PACKAGE_VERSION 38 | 39 | - name: Set up Python 40 | uses: actions/setup-python@v3 41 | with: 42 | python-version: '3.10' 43 | - name: Install dependencies 44 | run: | 45 | python -m pip install --upgrade pip 46 | pip install build 47 | 48 | - name: Build news_scraper 49 | working-directory: ./src/news 50 | run: python -m build 51 | - name: Publish news_scraper 52 | uses: pypa/gh-action-pypi-publish@v1.5.1 53 | with: 54 | user: __token__ 55 | password: ${{ secrets.PYPI_TOKEN }} 56 | packages_dir: ./src/news/dist/ 57 | 58 | - name: Build stock_scraper 59 | working-directory: ./src/stocks 60 | run: python -m build 61 | - name: Publish stock_scraper 62 | uses: pypa/gh-action-pypi-publish@v1.5.1 63 | with: 64 | user: __token__ 65 | password: ${{ secrets.PYPI_TOKEN }} 66 | packages_dir: ./src/stocks/dist/ 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Finance-Data-Scraper 2 | Packages to collect Stock and News data periodically from different sources and save it to databases. 3 | 4 | 5 | Also includes python packages to easily access and consume the data. 6 | 7 | ## Finance-Stock-Scraper 8 | Collects data from the [Yahoo-Finance API](https://finance.yahoo.com/) and saves it to a [QuestDB](https://questdb.io/) instance. 9 | 10 | ### Server 11 | Use the [docker-image](https://hub.docker.com/repository/docker/llukas22/finance-stock-scraper) to run the server. The server will collect data after each trading day. The tickers are provided via *.csv files that contain the ticker, name and long-name of the company. The files must be named after the exchange the tickers are listed on. An example can be found [here](tickers/NASDAQ.csv). 12 | 13 | An example config can be found in the [docker-compose.yml](docker-compose.yml) file. 14 | 15 | 16 | ### Consume the Data 17 | To consume the data, use the [pip package](https://pypi.org/project/finance-stock-scraper/). 18 | 19 | ``` 20 | pip install finance-stock-scraper 21 | ``` 22 | 23 | Then use the TickerRepository to get the data as a pandas DataFrame. 24 | 25 | ```python 26 | from finance_stock_scraper.QuestClient import QuestClient 27 | from finance_stock_scraper.TickerRepository import TickerRepository 28 | from finance_stock_scraper.model.Ticker import Ticker 29 | 30 | questClient = QuestClient(host=IP) 31 | tickerRepository = TickerRepository(questClient) 32 | 33 | #Build a ticker object 34 | ticker = Ticker("GOOGL","NASDAQ") 35 | #Get data from QuestDB 36 | df_daily = tickerRepository.get_values(tickers=ticker,interval="1d",values=["open","close","high","low","volume"]) 37 | df_minutly = tickerRepository.get_values(tickers=ticker,interval="5m",values=["open","close","high","low","volume"],start_time=START_TIME,end_time=END_TIME) 38 | ``` 39 | 40 | ## Finance-News-Scraper 41 | Scrape news and save them to a [MongoDB](https://www.mongodb.com/) instance. 42 | ### Server 43 | Use the [docker-image](https://hub.docker.com/repository/docker/llukas22/finance-news-scraper) to run the server. The server will collect articles from [Google-News](https://news.google.com/topstories), [FinViz](https://finviz.com/) or RSS-Feeds. Then a sentiment analysis will be performed and the articles will be saved to the database. 44 | 45 | 46 | An example config can be found in the [docker-compose.yml](docker-compose.yml) file. 47 | ### Consume the Data 48 | To consume the data, use the [pip package](https://pypi.org/project/finance-news-scraper/). 49 | 50 | ``` 51 | pip install finance-news-scraper 52 | ``` 53 | Then use the MongoDBClient to get the data as a pandas DataFrame. 54 | 55 | ```python 56 | from finance_news_scraper.mongo_client import MongoDBClient 57 | 58 | mongoClient = MongoDBClient(host=IP) 59 | 60 | articles = mongoClient.get_articles(["GOOGL"]) 61 | sentiments = mongoClient.get_sentiments("GOOGL",frequency="h",start=START_TIME,end=END_TIME) 62 | ``` 63 | 64 | ## Examples 65 | For more examples, see the [demo](demo/demo.ipynb) notebook. -------------------------------------------------------------------------------- /src/news/src/finance_news_scraper/news_sources.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import hashlib 3 | from bs4 import BeautifulSoup 4 | import requests 5 | from dateutil.parser import parse 6 | import pytz 7 | from google_news_feed import GoogleNewsFeed 8 | from datetime import datetime, timedelta 9 | from tqdm import tqdm 10 | import time 11 | import random 12 | import urllib.parse 13 | 14 | USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0' 15 | 16 | class News_Item(object): 17 | def __init__(self,publisher:str,link:str,tickers:list[str],pub_date:datetime) -> None: 18 | self.publisher = publisher.upper() 19 | self.link = link 20 | self.pub_date = pub_date 21 | self.tickers = tickers 22 | self.__hash = None 23 | 24 | @property 25 | def hash(self)->str: 26 | if self.__hash: 27 | return self.__hash 28 | else: 29 | self.__hash = hashlib.sha512((self.link).encode("UTF-8")).hexdigest() 30 | return self.__hash 31 | 32 | def get_rss_items(rss_url:str,publisher:str) -> list[News_Item]: 33 | news_items = [] 34 | r = requests.get(rss_url) 35 | webpage = r.content 36 | soup = BeautifulSoup(webpage, features='xml') 37 | items = soup.find_all('item') 38 | for item in items: 39 | link = item.find('link').text 40 | dt = parse(item.find('pubDate').text).astimezone(pytz.UTC) 41 | #Some sites give us the tickers of the article 42 | tickers = None 43 | if item.find('category'): 44 | categories = item.find_all('category') 45 | tickers = [category.text for category in categories] 46 | news_items.append(News_Item(publisher,link,tickers,dt)) 47 | return news_items 48 | 49 | def get_google_news_items(tickers:list[tuple[str,str,str]]) -> list[News_Item]: 50 | news_items = [] 51 | gnf = GoogleNewsFeed() 52 | for ticker,shortname,longname in tqdm(tickers,desc="Google News"): 53 | try: 54 | time.sleep(random.uniform(0.5,1.0)) # try to avoid being rate limited 55 | results = gnf.query(f"{ticker} OR {shortname} OR {longname}",when="2w") 56 | for result in results: 57 | news_items.append(News_Item(result.source,result.link,[ticker],result.pubDate)) 58 | except Exception as e: 59 | logging.error(e) 60 | return news_items 61 | 62 | def get_finviz_news_items(tickers:list[tuple[str,str,str]]) -> list[News_Item]: 63 | news_items = [] 64 | finwiz_url = 'https://finviz.com/quote.ashx?t=' 65 | for ticker,shortname,longname in tqdm(tickers,desc="FinViz News"): 66 | try: 67 | time.sleep(random.uniform(0.5,1.0)) # try to avoid being rate limited 68 | url = finwiz_url + ticker.lower() 69 | result = requests.get(url,headers = {'User-Agent': USER_AGENT}) 70 | if result.status_code == 200: 71 | html = BeautifulSoup(result.content, features='html.parser') 72 | news_table = html.find(id='news-table') 73 | 74 | for x in news_table.findAll('tr'): 75 | link = x.a.attrs['href'] 76 | publisher = x.span.get_text().strip() 77 | 78 | date_scrape = x.td.text.split() 79 | datetime = parse(" ".join(date_scrape)).astimezone(pytz.UTC) 80 | news_items.append(News_Item(publisher,link,[ticker],datetime)) 81 | except Exception as e: 82 | logging.error(e) 83 | return news_items 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /storage 2 | /.vscode 3 | /worker 4 | /sentiment_model 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | cover/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | .pybuilder/ 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | # For a library or package, you might want to ignore these files since the code is 92 | # intended to run in multiple environments; otherwise, check them in: 93 | # .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # poetry 103 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 104 | # This is especially recommended for binary packages to ensure reproducibility, and is more 105 | # commonly ignored for libraries. 106 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 107 | #poetry.lock 108 | 109 | # pdm 110 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 111 | #pdm.lock 112 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 113 | # in version control. 114 | # https://pdm.fming.dev/#use-with-ide 115 | .pdm.toml 116 | 117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 118 | __pypackages__/ 119 | 120 | # Celery stuff 121 | celerybeat-schedule 122 | celerybeat.pid 123 | 124 | # SageMath parsed files 125 | *.sage.py 126 | 127 | # Environments 128 | .venv 129 | env/ 130 | venv/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | 150 | # Pyre type checker 151 | .pyre/ 152 | 153 | # pytype static type analyzer 154 | .pytype/ 155 | 156 | # Cython debug symbols 157 | cython_debug/ 158 | 159 | # PyCharm 160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 162 | # and can be added to the global gitignore or merged into this file. For a more nuclear 163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 164 | #.idea/ 165 | -------------------------------------------------------------------------------- /src/stocks/src/finance_stock_scraper/TickerRepository.py: -------------------------------------------------------------------------------- 1 | from finance_stock_scraper.QuestClient import QuestClient 2 | from finance_stock_scraper.model.Ticker import Ticker 3 | import os 4 | import pandas as pd 5 | from datetime import datetime 6 | import logging 7 | 8 | class TickerRepository(object): 9 | exchanges:dict[str,dict[str,Ticker]] 10 | 11 | def __init__(self,quest_client:QuestClient) -> None: 12 | self.exchanges = {} 13 | self.quest_client = quest_client 14 | 15 | def load_tickers(self,directory:str)->None: 16 | """ 17 | Load tickers from *.csv files in a directory where the filename is the exchange and the tickers are the rows in the file. 18 | """ 19 | files = [file for file in os.listdir(directory) if file.endswith(".csv")] 20 | if len(files) == 0: 21 | logging.error("Found no *.csv files in the Tickers Directory!") 22 | 23 | logging.debug(f"Found files: {','.join(files)}") 24 | 25 | for file in files: 26 | file = os.path.join(directory,file) 27 | exchange = os.path.basename(file).split('.')[0].upper() 28 | tickers = pd.read_csv(file) 29 | for ticker in tickers.values: 30 | if ticker[0] is not None and isinstance(ticker[0],str): 31 | self.add_ticker(Ticker(ticker[0],exchange)) 32 | 33 | def add_ticker(self,ticker:Ticker)->None: 34 | if ticker.exchange not in self.exchanges: 35 | self.exchanges[ticker.exchange] = {} 36 | self.exchanges[ticker.exchange][ticker.ticker] = ticker 37 | 38 | 39 | def get_ticker(self,ticker:str)->Ticker|None: 40 | found_ticker = None 41 | ticker = ticker.upper() 42 | for exchage in self.exchanges: 43 | if ticker in self.exchanges[exchage]: 44 | found_ticker = self.exchanges[exchage][ticker] 45 | break 46 | return found_ticker 47 | 48 | 49 | def _get_single_value(self,ticker:Ticker,interval:str,values:list[str]=["close"],start_time:datetime|None=None,end_time:datetime|None=None)->pd.DataFrame|None: 50 | df = None 51 | result = self.quest_client.get_data(ticker,interval,values,start_time,end_time) 52 | if result: 53 | data = {} 54 | for i,column in enumerate(values): 55 | data[column] = [entry[i+1] for entry in result['dataset']] 56 | index = [datetime.strptime(entry[0] ,"%Y-%m-%dT%H:%M:%S.%fZ") for entry in result['dataset']] 57 | df = pd.DataFrame(data=data,index=index) 58 | return df 59 | 60 | def _get_multiple_values(self,tickers:list[Ticker],interval:str,values:list[str]=["close"],start_time:datetime|None=None,end_time:datetime|None=None)->dict[str,pd.DataFrame]: 61 | dataframes = {} 62 | for ticker in tickers: 63 | df = self._get_single_value(ticker,interval,values,start_time,end_time) 64 | if df is not None: 65 | dataframes[ticker.ticker] = df 66 | return dataframes 67 | 68 | def get_values(self,tickers:list[Ticker]|Ticker,interval:str,values:list[str]=["close"],start_time:datetime|None=None,end_time:datetime|None=None)->pd.DataFrame|dict[str,pd.DataFrame]|None: 69 | if isinstance(tickers,list): 70 | return self._get_multiple_values(tickers,interval,values,start_time,end_time) 71 | else: 72 | return self._get_single_value(tickers,interval,values,start_time,end_time) 73 | 74 | def remove(self,ticker:str)->bool: 75 | ticker = ticker.upper() 76 | for exchage in self.exchanges: 77 | if ticker in self.exchanges[exchage]: 78 | self.exchanges[exchage].pop(ticker) 79 | return True 80 | return False 81 | 82 | 83 | if __name__ == "__main__": 84 | questClient = QuestClient() 85 | 86 | repo = TickerRepository(questClient) 87 | 88 | ticker1 = Ticker("A","NASDAQ") 89 | ticker2 = Ticker("GOOGL","NASDAQ") 90 | repo.add_ticker(ticker1) 91 | repo.add_ticker(ticker2) 92 | data = repo.get_values([ticker1,ticker2],"1d",values=["close","volume"]) 93 | print(data) 94 | 95 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | questDB: 5 | container_name: questDB 6 | image: questdb/questdb:latest 7 | profiles: 8 | - database 9 | - stock-scraper 10 | - all 11 | ports: 12 | - "9000:9000" 13 | - "9003:9003" 14 | - "9009:9009" 15 | restart: 16 | unless-stopped 17 | volumes: 18 | - ./storage/questdb:/var/lib/questdb 19 | 20 | mongoDB: 21 | image: mongo 22 | container_name: mongodb 23 | profiles: 24 | - database 25 | - mongoDB 26 | - all 27 | volumes: 28 | - ./storage/mongodb:/data/db 29 | ports: 30 | - "27017:27017" 31 | restart: 32 | unless-stopped 33 | environment: 34 | MONGO_INITDB_ROOT_USERNAME: ${MONGO_DB_USER} 35 | MONGO_INITDB_ROOT_PASSWORD: ${MONGO_DB_PASDWORD} 36 | networks: 37 | - mongo-network 38 | 39 | mongo-express: 40 | image: mongo-express 41 | container_name: mongo-express 42 | profiles: 43 | - database 44 | - mongoDB 45 | - all 46 | ports: 47 | - "8081:8081" 48 | environment: 49 | ME_CONFIG_MONGODB_ADMINUSERNAME: ${MONGO_DB_USER} 50 | ME_CONFIG_MONGODB_ADMINPASSWORD: ${MONGO_DB_PASDWORD} 51 | ME_CONFIG_MONGODB_URL: mongodb://${MONGO_DB_USER}:${MONGO_DB_PASDWORD}@mongoDB:27017 52 | restart: 53 | unless-stopped 54 | depends_on: 55 | - mongoDB 56 | networks: 57 | - mongo-network 58 | 59 | stock-scraper: 60 | container_name: StockScraper 61 | #build: ./src/stocks/ 62 | image: llukas22/finance-stock-scraper:0.0.4 63 | profiles: 64 | - all 65 | - stock-scraper 66 | tty: true 67 | environment: 68 | - STOCKSCRAPER_QUESTDB_HOST=questdb 69 | - STOCKSCRAPER_QUESTDB_ILP_PORT=9009 #line protocol port 70 | - STOCKSCRAPER_QUESTDB_MONITORING_PORT=9003 #Port for health check 71 | - STOCKSCRAPER_QUESTDB_PORT=9000 #rest port 72 | - STOCKSCRAPER_MODE=Scheduled #Single or Scheduled 73 | - STOCKSCRAPER_TICKERS_DIR=/var/lib/stock-scraper 74 | - STOCKSCRAPER_SLEEPTIME=3600 #1hour in seconds 75 | - STOCKSCRAPER_DEBUG=True #activate debug mode 76 | volumes: 77 | - ./tickers:/var/lib/stock-scraper 78 | restart: 79 | unless-stopped 80 | depends_on: 81 | - questDB 82 | 83 | news-scraper: 84 | container_name: NewsScraper 85 | #build: ./src/news/ 86 | image: llukas22/finance-news-scraper:0.0.4 87 | profiles: 88 | - all 89 | tty: true 90 | environment: 91 | - NEWSSCRAPER_SCRAPE_NEWS=true #scrape news 92 | - NEWSSCRAPER_SENTIMENT_ANALYSIS=true #perfrom sentiment analysis on news (deactivate this if you dont have a container with a gpu) 93 | - NEWSSCRAPER_DOWNLOAD_RSS_FEED=false #Use the rss feeds from NEWSSCRAPER_RSS_DIR 94 | - NEWSSCRAPER_DOWNLOAD_GOOGLE_NEWS=true #Use Google news for the tickers found in STOCKSCRAPER_TICKERS_DIR 95 | - NEWSSCRAPER_DOWNLOAD_FINVIZ_NEWS=false #Use Finviz news for the tickers found in STOCKSCRAPER_TICKERS_DIR 96 | - NEWSSCRAPER_DEBUG=true #activate debug mode 97 | - NEWSSCRAPER_MODE=Scheduled #Single or Scheduled 98 | - NEWSSCRAPER_SENTIMENT_MODE=New #New or All 99 | - NEWSSCRAPER_SLEEPTIME=21600 #6 hours in seconds 100 | - STOCKSCRAPER_TICKERS_DIR=/var/lib/news-scraper/tickers 101 | - NEWSSCRAPER_RSS_DIR=/var/lib/news-scraper/rss 102 | #MongoDB Settings 103 | - NEWSSCRAPER_MONGODB_HOST=mongoDB 104 | - NEWSSCRAPER_MONGODB_PORT=27017 105 | - NEWSSCRAPER_MONGODB_USER=${MONGO_DB_USER} 106 | - NEWSSCRAPER_MONGODB_PASSWORD=${MONGO_DB_PASDWORD} 107 | - NEWSSCRAPER_MONGODB_DBNAME=news 108 | - NEWSSCRAPER_MONGODB_ARTICLE_COLLECTIONNAME=articles 109 | - NEWSSCRAPER_MONGODB_SENTIMENT_COLLECTIONNAME=sentiments 110 | #Sentiment Analysis Settings 111 | - NEWSSCRAPER_SENTIMENT_SEQUENZMODEL=ProsusAI/finbert #Huggingface model to use 112 | - NEWSSCRAPER_SENTIMENT_TOKENIZER=ProsusAI/finbert #Huggingface tokenizer to use 113 | - NEWSSCRAPER_MODEL_DIR=/var/lib/news-scraper/model #Directory where the model is stored 114 | volumes: 115 | - ./tickers:/var/lib/news-scraper/tickers 116 | - ./news:/var/lib/news-scraper/rss 117 | - ./sentiment_model:/var/lib/news-scraper/model 118 | depends_on: 119 | - mongoDB 120 | restart: 121 | unless-stopped 122 | networks: 123 | - mongo-network 124 | 125 | networks: 126 | mongo-network: 127 | driver: bridge -------------------------------------------------------------------------------- /src/news/src/finance_news_scraper/sentiment.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | from torch.nn import functional as f 4 | from transformers import BertForSequenceClassification, BertTokenizer 5 | import numpy as np 6 | import os 7 | 8 | MAX_LENGTH = 512 9 | START_TOKEN = 101 10 | STOP_TOKEN = 102 11 | PADDING_TOKEN = 0 12 | ACTIVE_MASK = 1 13 | IGNORE_MASK = 0 14 | CLASSES = [-1,0,1] 15 | TOKENIZER_MODEL = os.getenv('NEWSSCRAPER_SENTIMENT_TOKENIZER',"ProsusAI/finbert") 16 | SEQUENZECLASSIFICATION_MODEL = os.getenv('NEWSSCRAPER_SENTIMENT_SEQUENZMODEL',"ProsusAI/finbert") 17 | MODEL_DIR = os.path.abspath(os.getenv('NEWSSCRAPER_MODEL_DIR',"../../../sentiment_model")) 18 | 19 | 20 | class SentimentProvider(object): 21 | tokenizer: BertTokenizer 22 | model: torch.jit._script.RecursiveScriptModule 23 | def __init__(self) -> None: 24 | self.tokenizer = None 25 | self.model = None 26 | os.makedirs(MODEL_DIR,exist_ok=True) 27 | 28 | @property 29 | def is_model_loaded(self)->bool: 30 | return self.model is not None and self.tokenizer is not None 31 | 32 | def load_model(self)->None: 33 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 34 | self.tokenizer = BertTokenizer.from_pretrained(TOKENIZER_MODEL) 35 | os.makedirs(MODEL_DIR,exist_ok=True) 36 | if not os.path.isfile(os.path.join(MODEL_DIR,"model.pt")): 37 | #build the torchscript model to gain some speed 38 | logging.info("Building torchscript model ...") 39 | model = BertForSequenceClassification.from_pretrained(SEQUENZECLASSIFICATION_MODEL) 40 | model.eval() 41 | input_ids = torch.rand((1, MAX_LENGTH)).long() 42 | attention_mask = torch.rand((1, MAX_LENGTH)).int() 43 | self.model = torch.jit.trace(model, [input_ids,attention_mask],strict=False) 44 | self.model.save(os.path.join(MODEL_DIR,"model.pt")) 45 | logging.info("Finished torchscript model!") 46 | else: 47 | self.model = torch.jit.load(os.path.join(MODEL_DIR,"model.pt")) 48 | 49 | self.model.eval() 50 | self.model = self.model.to(self.device) 51 | 52 | def dispose_model(self)->None: 53 | del self.model 54 | self.model = None 55 | del self.tokenizer 56 | self.tokenizer = None 57 | 58 | def get_sentiment(self,text:str) -> tuple[int,np.ndarray]: 59 | """ 60 | Computes the class and the probability of the logits for the given text. 61 | """ 62 | if not self.is_model_loaded: 63 | self.load_model() 64 | 65 | with torch.no_grad(): 66 | tokenized = self.encode(text) 67 | prediction = f.softmax(self.model(tokenized['input_ids'],tokenized['attention_mask'])['logits'], dim=-1).mean(dim=0).cpu().numpy() 68 | predicted_class = CLASSES[np.argmax(prediction)] 69 | return predicted_class,prediction 70 | 71 | 72 | def encode(self,text:str): 73 | """ 74 | Uses the tokenizer to build MAX_LENGTH long slices of the text. 75 | """ 76 | with torch.no_grad(): 77 | tokenized = self.tokenizer.encode_plus(text,add_special_tokens=False,return_tensors="pt") 78 | #to support longer texts we split the sequence and pad it manually => then we pass it to the model 79 | split_length = MAX_LENGTH-2 80 | input_id_chunks = tokenized['input_ids'][0].split(split_length) 81 | mask_chunks = tokenized['attention_mask'][0].split(split_length) 82 | 83 | padded_ids= [] 84 | padded_masks = [] 85 | for i in range(len(input_id_chunks)): 86 | padded_ids.append(torch.cat([torch.Tensor([START_TOKEN]),input_id_chunks[i],torch.Tensor([STOP_TOKEN])])) 87 | padded_masks.append(torch.cat([torch.Tensor([ACTIVE_MASK]),mask_chunks[i],torch.Tensor([ACTIVE_MASK])])) 88 | 89 | for i in range(len(padded_ids)): 90 | padding_length = MAX_LENGTH - len(padded_ids[i]) 91 | if padding_length > 0: 92 | padded_ids[i] = torch.cat([padded_ids[i],torch.Tensor([PADDING_TOKEN]*padding_length)]) 93 | padded_masks[i] = torch.cat([padded_masks[i],torch.Tensor([IGNORE_MASK]*padding_length)]) 94 | 95 | input_ids = torch.stack(padded_ids).long().to(self.device) 96 | attention_mask = torch.stack(padded_masks).int().to(self.device) 97 | return{ 98 | 'input_ids': input_ids, 99 | 'attention_mask': attention_mask 100 | } 101 | 102 | 103 | if __name__ == "__main__": 104 | sentimentProvider = SentimentProvider() 105 | sentimentProvider.load_model() 106 | sentimentProvider.get_sentiment("Hello World "*1000) 107 | -------------------------------------------------------------------------------- /src/stocks/src/finance_stock_scraper/__main__.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | import pytz 4 | import pandas_market_calendars as mcal 5 | import logging 6 | from datetime import datetime, timedelta 7 | from finance_stock_scraper.ExecutionContext import ExecutionContext 8 | from finance_stock_scraper.QuestClient import QuestClient 9 | from finance_stock_scraper.TickerRepository import TickerRepository 10 | from finance_stock_scraper.YFDataProvider import YFDataProvider 11 | from finance_stock_scraper.model.Ticker import Ticker 12 | from finance_stock_scraper.workflow import gather_data 13 | 14 | 15 | 16 | TICKERS_DIR = os.path.abspath(os.getenv('STOCKSCRAPER_TICKERS_DIR',"../../../Tickers")) 17 | DEBUG = os.getenv('STOCKSCRAPER_DEBUG',"False").upper() == "TRUE" 18 | MODE = os.getenv('STOCKSCRAPER_MODE',"Single").upper() # Single or Scheduled 19 | SLEEP_TIME = int(os.getenv('STOCKSCRAPER_SLEEPTIME',60*60*3)) # 3 hours 20 | 21 | if __name__ == "__main__": 22 | 23 | logging.getLogger("requests").setLevel(logging.WARNING) 24 | logging.getLogger("urllib3").setLevel(logging.WARNING) 25 | 26 | if DEBUG: 27 | logging.basicConfig(format='[%(asctime)s] %(levelname)s - %(message)s',level=logging.DEBUG) 28 | else: 29 | logging.basicConfig(format='[%(asctime)s] %(levelname)s - %(message)s',level=logging.INFO) 30 | 31 | logging.info(f"---Starting Scraper---") 32 | logging.info(f"TICKERS_DIR:{TICKERS_DIR}") 33 | logging.info(f"MODE:{MODE}") 34 | logging.info(f"SLEEP_TIME:{SLEEP_TIME}") 35 | 36 | # Create QuestClient 37 | questClient = QuestClient() 38 | retries = 0 39 | while True: 40 | if questClient.health_check(): 41 | break 42 | else: 43 | logging.warning(f"Could not establish connection to QuestDB! Retrying ...") 44 | retries += 1 45 | time.sleep(2) 46 | 47 | if retries > 10: 48 | raise Exception("Could not connect to QuestDB!") 49 | 50 | if not os.path.isdir(TICKERS_DIR): 51 | raise Exception(f"Tickers Directory '{TICKERS_DIR}' does not exist!") 52 | 53 | 54 | ticker_repo = TickerRepository(questClient) 55 | ticker_repo.load_tickers(TICKERS_DIR) 56 | 57 | logging.info("Loaded Tickers:") 58 | for exchange in ticker_repo.exchanges: 59 | logging.info(f"{exchange}:") 60 | logging.info(",".join([ticker for ticker in ticker_repo.exchanges[exchange]])) 61 | 62 | yfDataProvider = YFDataProvider() 63 | 64 | executionContext = ExecutionContext(ticker_repo, yfDataProvider, questClient) 65 | 66 | # if its in single mode, we will run the gathering process once and then exit 67 | if MODE == "SINGLE": 68 | for exchange in ticker_repo.exchanges: 69 | now = datetime.now().astimezone(pytz.utc) 70 | gather_data(exchange, executionContext,now) 71 | else: 72 | # otherwise, we will run the gathering process in a loop 73 | last_runs = {} 74 | while True: 75 | try: 76 | #check all exchanges and if we are are after the tradingtimes we start the gathering process 77 | now = datetime.now().astimezone(pytz.utc) 78 | for exchange in ticker_repo.exchanges: 79 | 80 | #Check if we already run the gathering process for this exchange today 81 | if exchange in last_runs: 82 | last_run = last_runs[exchange] 83 | if now-last_run < timedelta(hours=23,minutes=45): 84 | continue 85 | 86 | calender = mcal.get_calendar(exchange) 87 | schedule = calender.schedule(start_date=now, end_date=now) 88 | if len(schedule) == 0: 89 | #Not a trading day => nothing to fetch 90 | continue 91 | closing_time = schedule["market_close"][0] 92 | #Yahoo Finance can have a delay of 15-30 Minutes for the data to be available => we add 30 minutes to the closing time 93 | closing_time += timedelta(minutes=30) 94 | if now > closing_time: 95 | #we are after the closing time => we start the gathering process 96 | last_runs[exchange] = now 97 | logging.info(f"Starting gathering process for exchange {exchange}...") 98 | gather_data(exchange, executionContext, now) 99 | logging.info(f"Finished gathering process for exchange {exchange}!") 100 | 101 | #sleep for a while 102 | logging.info(f"Sleeping for {SLEEP_TIME} Seconds!") 103 | time.sleep(SLEEP_TIME) 104 | except Exception as e: 105 | logging.error(e) 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /src/stocks/src/finance_stock_scraper/QuestClient.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime,date 2 | import os 3 | from questdb.ingress import Sender, TimestampNanos, Buffer 4 | import requests 5 | from requests import Response 6 | import pytz 7 | from finance_stock_scraper.model.Ticker import Ticker 8 | from finance_stock_scraper.model.Intervals import INTERVALS, IntervalTypes 9 | 10 | 11 | HOST = os.getenv('STOCKSCRAPER_QUESTDB_HOST','localhost') 12 | INFLUX_LINE_PROTOCOL_PORT = os.getenv('STOCKSCRAPER_QUESTDB_ILP_PORT',9009) 13 | REST_PORT = os.getenv('STOCKSCRAPER_QUESTDB_PORT',9000) 14 | MONITORING_PORT = os.getenv('STOCKSCRAPER_QUESTDB_MONITORING_PORT',9003) 15 | 16 | class QuestClient(object): 17 | def __init__(self,host:str=HOST,port:int=REST_PORT,ilp_port:int=INFLUX_LINE_PROTOCOL_PORT,monitoring_port:int=MONITORING_PORT)-> None: 18 | self.host = host 19 | self.ilp_port = ilp_port 20 | self.port = port 21 | self.monitoring_port = monitoring_port 22 | 23 | def health_check(self)-> bool: 24 | try: 25 | return requests.get(f"http://{self.host}:{self.monitoring_port}/status").status_code == 200 26 | except: 27 | return False 28 | 29 | 30 | 31 | def _format_time(self,time:datetime)-> str: 32 | return time.strftime("%Y-%m-%dT%H:%M:%S.%fZ") 33 | 34 | def create_table(self,interval:str)-> None: 35 | if interval not in INTERVALS: 36 | raise Exception(f"Interval {interval} is not supported") 37 | interval_type = INTERVALS[interval] 38 | 39 | query = f"CREATE TABLE IF NOT EXISTS 'interval_{interval}'"\ 40 | "("\ 41 | "exchange Symbol,"\ 42 | "ticker Symbol,"\ 43 | "open float,"\ 44 | "high float,"\ 45 | "low float,"\ 46 | "close float,"\ 47 | "adj_close float,"\ 48 | "volume long,"\ 49 | "timestamp TIMESTAMP"\ 50 | "),"\ 51 | "index (ticker)"\ 52 | "timestamp(timestamp)"\ 53 | f"PARTITION BY {'YEAR' if interval_type == IntervalTypes.Daily else 'MONTH'};" 54 | self.raw_query(query) 55 | 56 | 57 | 58 | 59 | def get_existing_tickers_for_interval(self,interval:str,exchange:str)->list[str]: 60 | query = f"SELECT DISTINCT ticker FROM 'interval_{interval}' WHERE exchange = '{exchange}'" 61 | response = self.raw_query(query) 62 | if response.status_code == 200: 63 | return [ticker[0] for ticker in response.json()['dataset']] 64 | else: 65 | return [] 66 | 67 | 68 | def get_last_entry_dates(self,interval:str)-> dict[str,datetime]: 69 | query = f"SELECT ticker, timestamp FROM 'interval_{interval}'"\ 70 | "LATEST ON timestamp PARTITION BY ticker;" 71 | 72 | response = self.raw_query(query) 73 | last_entries = {} 74 | if response.status_code == 200: 75 | for ticker,time in response.json()['dataset']: 76 | last_entries[ticker] = datetime.strptime(time,"%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=pytz.UTC) 77 | return last_entries 78 | else: 79 | return last_entries 80 | 81 | def store_points(self,buffer:Buffer)-> None: 82 | if len(buffer) > 0: 83 | with Sender(self.host, self.ilp_port) as sender: 84 | sender.flush(buffer) 85 | 86 | def get_data(self,ticker:Ticker,interval:str,values:list[str]=["close"],start_date:datetime|None=None,end_date:datetime|None=None)-> None|dict: 87 | """ 88 | Querry data for the given ticker and interval 89 | """ 90 | selection = ["timestamp"]+values 91 | selection = ",".join(selection) 92 | query = f"SELECT {selection} FROM 'interval_{interval}'" 93 | query += "WHERE " 94 | if end_date and start_date: 95 | query += f"timestamp BETWEEN '{self._format_time(start_date)}' AND '{self._format_time(end_date)}' AND " 96 | elif end_date and not start_date: 97 | query += f"timestamp <= '{self._format_time(end_date)}' AND " 98 | elif start_date and not end_date: 99 | query += f"timestamp >= '{self._format_time(start_date)}' AND " 100 | 101 | query += f"ticker='{ticker.ticker}' AND exchange='{ticker.exchange}';" 102 | 103 | response = self.raw_query(query) 104 | if response.status_code == 200: 105 | return response.json() 106 | return None 107 | 108 | def raw_query(self,query:str)-> Response: 109 | return requests.get(f"http://{self.host}:{self.port}/exec?query=" + requests.utils.quote(query)) 110 | 111 | if __name__ == "__main__": 112 | questClient = QuestClient() 113 | questClient.get_last_entry_dates("1m") 114 | ticker = Ticker("GOOGL","NASDAQ") 115 | result = questClient.get_data(ticker, "1d",values=["close","volume"],start_date=datetime(year = 2010,month=1,day=1),end_date=datetime(year = 2011,month=1,day=1)) -------------------------------------------------------------------------------- /src/news/src/tests/test_mongo_db_client.py: -------------------------------------------------------------------------------- 1 | from multiprocessing.context import assert_spawning 2 | import mongomock 3 | import pymongo 4 | from finance_news_scraper.mongo_client import MongoDBClient 5 | from finance_news_scraper.news_sources import News_Item 6 | from datetime import datetime 7 | from newspaper import Article 8 | import numpy as np 9 | import pytz 10 | import pandas as pd 11 | 12 | @mongomock.patch(servers=(('server.example.com', 27017),)) 13 | def test_client_creates_collections(): 14 | client = MongoDBClient('server.example.com',27017,better_compression=False) 15 | assert client.article_collection is not None 16 | assert client.sentiment_collection is not None 17 | 18 | @mongomock.patch(servers=(('server.example.com', 27017),)) 19 | def test_client_can_build_article(): 20 | client = MongoDBClient('server.example.com',27017,better_compression=False) 21 | news_item = News_Item('publisher', 'link', ["A", "B"], datetime.now()) 22 | article = Article(news_item.link) 23 | article.text = "Test Text" 24 | article.authors = ["Foo","Bar"] 25 | document = client.build_document(news_item,article) 26 | assert document['text'] == "Test Text" 27 | assert document['authors'] == ["Foo","Bar"] 28 | assert document['url'] == news_item.link 29 | assert document['hash'] == news_item.hash 30 | assert document['publisher'] == news_item.publisher 31 | assert document['tickers'] == news_item.tickers 32 | assert document['date'] == news_item.pub_date 33 | 34 | @mongomock.patch(servers=(('server.example.com', 27017),)) 35 | def test_client_can_insert_article(): 36 | client = MongoDBClient('server.example.com',27017,better_compression=False) 37 | news_item = News_Item('publisher', 'link', ["A", "B"], datetime.now()) 38 | article = Article(news_item.link) 39 | article.text = "Test Text" 40 | article.authors = ["Foo","Bar"] 41 | document = client.build_document(news_item,article) 42 | client.insert_article(document) 43 | inserted = client.article_collection.find_one() 44 | assert inserted['hash'] == news_item.hash 45 | 46 | 47 | @mongomock.patch(servers=(('server.example.com', 27017),)) 48 | def test_client_can_find_article_by_hash(): 49 | client = MongoDBClient('server.example.com',27017,better_compression=False) 50 | client.article_collection.insert_one({'hash':'foobar'}) 51 | found = client.find_article_by_hash('foobar') 52 | assert found is not None 53 | 54 | @mongomock.patch(servers=(('server.example.com', 27017),)) 55 | def test_client_can_insert_sentiment(): 56 | client = MongoDBClient('server.example.com',27017,better_compression=False) 57 | now = datetime.now() 58 | client.insert_sentiment((1,np.array([1,2,3])),'foobar',now,['A','B']) 59 | inserted = client.sentiment_collection.find_one() 60 | assert inserted['article_hash'] == 'foobar' 61 | assert inserted['tickers'] == ['A','B'] 62 | assert inserted['sentiment']['class'] == 1 63 | assert inserted['sentiment']['probabilities'] == [1,2,3] 64 | 65 | @mongomock.patch(servers=(('server.example.com', 27017),)) 66 | def test_client_can_get_articles(): 67 | client = MongoDBClient('server.example.com',27017,better_compression=False) 68 | client.article_collection.insert_many([ 69 | {"hash":"A","tickers":["A"]}, 70 | {"hash":"B","tickers":["A","B"]}, 71 | {"hash":"C","tickers":["B"]} 72 | ]) 73 | articles = list(client.get_articles(["A"])) 74 | assert articles is not None 75 | assert len(articles) == 2 76 | 77 | @mongomock.patch(servers=(('server.example.com', 27017),)) 78 | def test_client_can_get_articles_with_startdate(): 79 | client = MongoDBClient('server.example.com',27017,better_compression=False) 80 | 81 | client.article_collection.insert_many([ 82 | {"hash":"A","tickers":["A"], "date":datetime(2018,1,1)}, 83 | {"hash":"B","tickers":["A"], "date":datetime(2019,1,1)}, 84 | {"hash":"C","tickers":["A"],"date":datetime(2020,1,1)}, 85 | {"hash":"D","tickers":["A"],"date":datetime(2021,1,1)} 86 | ]) 87 | articles = list(client.get_articles(["A"],start=datetime(2019,6,6))) 88 | assert articles is not None 89 | assert len(articles) == 2 90 | assert articles[0]['hash'] == 'D' 91 | assert articles[1]['hash'] == 'C' 92 | 93 | @mongomock.patch(servers=(('server.example.com', 27017),)) 94 | def test_client_can_get_articles_with_enddate(): 95 | client = MongoDBClient('server.example.com',27017,better_compression=False) 96 | 97 | client.article_collection.insert_many([ 98 | {"hash":"A","tickers":["A"], "date":datetime(2018,1,1)}, 99 | {"hash":"B","tickers":["A"], "date":datetime(2019,1,1)}, 100 | {"hash":"C","tickers":["A"],"date":datetime(2020,1,1)}, 101 | {"hash":"D","tickers":["A"],"date":datetime(2021,1,1)} 102 | ]) 103 | articles = list(client.get_articles(["A"],end=datetime(2019,6,6))) 104 | assert articles is not None 105 | assert len(articles) == 2 106 | assert articles[0]['hash'] == 'B' 107 | assert articles[1]['hash'] == 'A' 108 | 109 | @mongomock.patch(servers=(('server.example.com', 27017),)) 110 | def test_client_can_get_articles_with_start_and_enddate(): 111 | client = MongoDBClient('server.example.com',27017,better_compression=False) 112 | 113 | client.article_collection.insert_many([ 114 | {"hash":"A","tickers":["A"], "date":datetime(2018,1,1)}, 115 | {"hash":"B","tickers":["A"], "date":datetime(2019,1,1)}, 116 | {"hash":"C","tickers":["A"],"date":datetime(2020,1,1)}, 117 | {"hash":"D","tickers":["A"],"date":datetime(2021,1,1)}, 118 | {"hash":"E","tickers":["A"],"date":datetime(2022,1,1)} 119 | ]) 120 | articles = list(client.get_articles(["A"],start=datetime(2019,6,6),end=datetime(2021,6,6))) 121 | assert articles is not None 122 | assert len(articles) == 2 123 | assert articles[0]['hash'] == 'D' 124 | assert articles[1]['hash'] == 'C' 125 | 126 | @mongomock.patch(servers=(('server.example.com', 27017),)) 127 | def test_client_can_get_raw_sentiment(): 128 | client = MongoDBClient('server.example.com',27017,better_compression=False) 129 | 130 | client.sentiment_collection.insert_many([ 131 | {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2018,1,1)}, 132 | {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2019,1,1)}, 133 | {"tickers":["B"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2020,1,1)}, 134 | ]) 135 | articles = list(client.get_raw_sentiments(["A"])) 136 | assert articles is not None 137 | assert len(articles) == 2 138 | 139 | @mongomock.patch(servers=(('server.example.com', 27017),)) 140 | def test_client_can_get_sentiment(): 141 | client = MongoDBClient('server.example.com',27017,better_compression=False) 142 | 143 | client.sentiment_collection.insert_many([ 144 | {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2018,1,1)}, 145 | {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2019,1,1)}, 146 | {"tickers":["B"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2020,1,1)}, 147 | ]) 148 | df = client.get_sentiments(["A"],frequency='Y',fill_blanks=False) 149 | assert isinstance(df,pd.DataFrame) 150 | assert len(df) == 2 151 | 152 | 153 | @mongomock.patch(servers=(('server.example.com', 27017),)) 154 | def test_client_get_sentiment_interpolates_values(): 155 | client = MongoDBClient('server.example.com',27017,better_compression=False) 156 | 157 | client.sentiment_collection.insert_many([ 158 | {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2018,1,1)}, 159 | {"tickers":["A"],"sentiment":{"class":0,'probabilities':[0,1,0] },"date":datetime(2019,1,1)}, 160 | {"tickers":["A"],"sentiment":{"class":-1,'probabilities':[1,0,0] },"date":datetime(2020,1,1)}, 161 | ]) 162 | df = client.get_sentiments("A",frequency='D',fill_blanks=True) 163 | assert isinstance(df,pd.DataFrame) 164 | assert len(df) == 731 165 | assert df.isna().sum().sum() == 0 166 | 167 | @mongomock.patch(servers=(('server.example.com', 27017),)) 168 | def test_client_get_sentiment_can_get_2_sentiments(): 169 | client = MongoDBClient('server.example.com',27017,better_compression=False) 170 | 171 | client.sentiment_collection.insert_many([ 172 | {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2018,1,1)}, 173 | {"tickers":["A"],"sentiment":{"class":0,'probabilities':[0,1,0] },"date":datetime(2019,1,1)}, 174 | {"tickers":["B"],"sentiment":{"class":-1,'probabilities':[1,0,0] },"date":datetime(2020,1,1)}, 175 | ]) 176 | result = client.get_sentiments(["A","B"],frequency='Y',fill_blanks=False) 177 | assert isinstance(result,dict) 178 | assert len(result) == 2 179 | assert len(result['B']) == 1 180 | assert len(result['A']) == 2 -------------------------------------------------------------------------------- /src/news/src/finance_news_scraper/__main__.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import logging 4 | from finance_news_scraper.mongo_client import MongoDBClient 5 | from tqdm import tqdm 6 | from newspaper import Article 7 | from newspaper.article import ArticleException 8 | from finance_news_scraper.sentiment import SentimentProvider 9 | from finance_news_scraper.news_sources import get_rss_items,get_finviz_news_items,get_google_news_items 10 | from newspaper import Config 11 | import pandas as pd 12 | from finance_news_scraper.news_sources import News_Item 13 | from tqdm.contrib.concurrent import thread_map 14 | import time 15 | 16 | RSS_DIR = os.path.abspath(os.getenv('NEWSSCRAPER_RSS_DIR',"../../../news")) 17 | TICKERS_DIR = os.path.abspath(os.getenv('STOCKSCRAPER_TICKERS_DIR',"../../../Tickers")) 18 | DEBUG = os.getenv('NEWSSCRAPER_DEBUG',"True").upper() == "TRUE" 19 | MODE = os.getenv('NEWSSCRAPER_MODE',"Single").upper() # Single or Scheduled 20 | SLEEP_TIME = int(os.getenv('NEWSSCRAPER_SLEEPTIME',60*60*6)) # 6 hours 21 | SENTIMENT_MODE = os.getenv('NEWSSCRAPER_SENTIMENT_MODE',"ALL").upper() # ALL or NEW 22 | PERFORM_SENTIMENT_ANALYSIS = os.getenv('NEWSSCRAPER_SENTIMENT_ANALYSIS',"TRUE").upper() == "TRUE" 23 | PERFORM_NEWS_SCRAPING = os.getenv('NEWSSCRAPER_SCRAPE_NEWS',"TRUE").upper() == "TRUE" 24 | DOWNLOAD_RSS_FEED = os.getenv('NEWSSCRAPER_DOWNLOAD_RSS_FEED',"FALSE").upper() == "TRUE" 25 | DOWNLOAD_GOOGLE_NEWS = os.getenv('NEWSSCRAPER_DOWNLOAD_GOOGLE_NEWS',"TRUE").upper() == "TRUE" 26 | DOWNLOAD_FINVIZ_NEWS = os.getenv('NEWSSCRAPER_DOWNLOAD_FINVIZ_NEWS',"FALSE").upper() == "TRUE" 27 | 28 | USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0' 29 | JS_ERROR_TEXTS = [ 30 | "please enable Javascript", 31 | "Javascript is Disabled" 32 | ] 33 | 34 | if __name__ == "__main__": 35 | 36 | logging.getLogger("requests").setLevel(logging.WARNING) 37 | logging.getLogger("urllib3").setLevel(logging.WARNING) 38 | logging.getLogger("huggingface").setLevel(logging.WARNING) 39 | logging.getLogger("newspaper").setLevel(logging.WARNING) 40 | logging.getLogger("transformers").setLevel(logging.WARNING) 41 | logging.getLogger("httpx").setLevel(logging.WARNING) 42 | logging.getLogger("asyncio").setLevel(logging.WARNING) 43 | 44 | if DEBUG: 45 | logging.basicConfig(format='[%(asctime)s] %(levelname)s - %(message)s',level=logging.DEBUG) 46 | else: 47 | logging.basicConfig(format='[%(asctime)s] %(levelname)s - %(message)s',level=logging.INFO) 48 | 49 | logging.info(f"RSS_DIR:{RSS_DIR}") 50 | logging.info(f"TICKERS_DIR:{TICKERS_DIR}") 51 | 52 | os.makedirs(RSS_DIR,exist_ok=True) 53 | os.makedirs(TICKERS_DIR,exist_ok=True) 54 | 55 | rss_feeds = None 56 | rss_file = os.path.join(RSS_DIR,"rss-feeds.json") 57 | if os.path.isfile(rss_file): 58 | with open(rss_file) as f: 59 | rss_feeds = json.load(f) 60 | 61 | files = [file for file in os.listdir(TICKERS_DIR) if file.endswith(".csv")] 62 | logging.info(f"Found Ticker files: {','.join(files)}") 63 | 64 | 65 | tickers=[] 66 | for file in files: 67 | file = os.path.join(TICKERS_DIR,file) 68 | exchange = os.path.basename(file).split('.')[0].upper() 69 | local_tickers = pd.read_csv(file) 70 | for ticker in local_tickers.values: 71 | if ticker[0] is not None and isinstance(ticker[0],str): 72 | tickers.append((ticker[0],ticker[1],ticker[2])) 73 | 74 | config = Config() 75 | config.browser_user_agent = USER_AGENT 76 | config.request_timeout = 7 77 | config.fetch_images = False 78 | 79 | mongoClient = MongoDBClient() 80 | sentimentProvider = SentimentProvider() 81 | 82 | while True: 83 | news_items:list[News_Item] = [] 84 | 85 | if PERFORM_NEWS_SCRAPING: 86 | 87 | if DOWNLOAD_GOOGLE_NEWS and len(tickers) > 0: 88 | news_items += get_google_news_items(tickers[:10]) 89 | 90 | if DOWNLOAD_FINVIZ_NEWS and len(tickers) > 0: 91 | news_items += get_finviz_news_items(tickers) 92 | 93 | if DOWNLOAD_RSS_FEED and rss_feeds: 94 | for publisher,feed in tqdm(rss_feeds.items(),desc="RSS Feeds"): 95 | news_items += get_rss_items(feed,publisher) 96 | 97 | #Group by site hash 98 | grouped_news_items = {} 99 | for news_item in news_items: 100 | if news_item.hash in grouped_news_items: 101 | grouped_news_items[news_item.hash].append(news_item) 102 | else: 103 | grouped_news_items[news_item.hash] = [news_item] 104 | 105 | cleaned_news_items = [] 106 | 107 | #Collaps duplicates into a single item 108 | for hash,news_items in grouped_news_items.items(): 109 | news_item = news_items[0] 110 | tickers = [] 111 | for item in news_items: 112 | if item.tickers: 113 | tickers += item.tickers 114 | tickers = list(set(tickers)) 115 | news_item.tickers = tickers 116 | cleaned_news_items.append(news_item) 117 | 118 | def download_news_items(news_items:list[News_Item])->list[str]: 119 | 120 | hashes = [] 121 | to_store = [] 122 | for news_item in news_items: 123 | try: 124 | existing_entry = mongoClient.find_article_by_hash(news_item.hash) 125 | if existing_entry: 126 | #if needed update the tickers in the entry 127 | if mongoClient.update_article_tickers(existing_entry["_id"],existing_entry["tickers"],news_item.tickers): 128 | hashes.append(news_item.hash) 129 | continue 130 | 131 | article = Article(news_item.link,language="en",config=config, fetch_images=False) 132 | article.download() 133 | article.parse() 134 | 135 | for js_error_text in JS_ERROR_TEXTS: 136 | if js_error_text in article.text: 137 | logging.debug(f"JS ERROR: {news_item.link}") 138 | failed_downloads.append(news_item.link) 139 | continue 140 | 141 | to_store.append(mongoClient.build_document(news_item,article)) 142 | hashes.append(news_item.hash) 143 | 144 | except ArticleException as articleException: 145 | logging.debug(f"ArticleError: {articleException}") 146 | failed_downloads.append(news_item.link) 147 | 148 | except Exception as e: 149 | logging.error(f"{e}") 150 | continue 151 | 152 | mongoClient.insert_articles(to_store) 153 | return hashes 154 | 155 | 156 | hashes = [] 157 | failed_downloads = [] 158 | logging.info(f"Found {len(cleaned_news_items)} articles!") 159 | if len(cleaned_news_items) > 0: 160 | list_of_hashes = [] 161 | 162 | def chunks(lst, n): 163 | """Yield successive n-sized chunks from lst.""" 164 | for i in range(0, len(lst), n): 165 | yield lst[i:i + n] 166 | 167 | def flatten(l): 168 | return [item for sublist in l for item in sublist] 169 | 170 | list_of_hashes = thread_map(download_news_items,list(chunks(cleaned_news_items,25)),max_workers=16,desc="Downloading News Items") 171 | hashes = flatten(list_of_hashes) 172 | 173 | hashes = set([hash for hash in list(set(hashes)) if hash]) 174 | logging.info(f"Failed {len(failed_downloads)} Downloads!") 175 | logging.info(f"Stored {len(hashes)} items!") 176 | 177 | #Sentiment Analysis 178 | if PERFORM_SENTIMENT_ANALYSIS: 179 | logging.info(f"Start Sentiment Analysis") 180 | 181 | def analyse_article(hash:str): 182 | try: 183 | article = mongoClient.find_article_by_hash(hash) 184 | if article: 185 | existing_sentiment = mongoClient.find_sentiment_by_hash(hash) 186 | if existing_sentiment: 187 | #if needed update the tickers in the entry 188 | mongoClient.update_sentiment_tickers(existing_sentiment["_id"],existing_sentiment["tickers"],article["tickers"]) 189 | return 190 | 191 | sentiment = sentimentProvider.get_sentiment(article["text"]) 192 | mongoClient.insert_sentiment(sentiment,hash,article["date"],article["tickers"]) 193 | except Exception as e: 194 | logging.error(f"{e}") 195 | 196 | sentiments_to_calculate = [] 197 | 198 | if SENTIMENT_MODE == "ALL": 199 | #Get all Articles that have no sentiment 200 | sentiments_to_calculate = mongoClient.get_all_article_hashes().difference(mongoClient.get_all_sentiment_hashes()) 201 | else: 202 | #Only process the new articles 203 | sentiments_to_calculate = hashes 204 | 205 | if len(sentiments_to_calculate) > 0: 206 | for hash in tqdm(sentiments_to_calculate,desc="Sentiment Analysis"): 207 | analyse_article(hash) 208 | logging.info(f"Finished Sentiment Analysis!") 209 | 210 | if MODE == "SINGLE": 211 | break 212 | 213 | logging.info(f"Sleeping for {SLEEP_TIME} seconds ...") 214 | sentimentProvider.dispose_model() 215 | time.sleep(SLEEP_TIME) -------------------------------------------------------------------------------- /src/stocks/src/finance_stock_scraper/workflow.py: -------------------------------------------------------------------------------- 1 | from finance_stock_scraper.ExecutionContext import ExecutionContext 2 | from finance_stock_scraper.model.Intervals import INTERVALS,IntervalTypes 3 | from finance_stock_scraper.model.Ticker import Ticker 4 | import pytz 5 | import pandas as pd 6 | import os 7 | import logging 8 | import traceback 9 | from tqdm import tqdm 10 | import ctypes 11 | import numpy as np 12 | from questdb.ingress import TimestampNanos, Buffer 13 | from datetime import datetime,date,timedelta 14 | 15 | CONFIGURED_INTERVALS = os.getenv("STOCKSCRAPER_INTERVALS","5m,1d").split(",") 16 | FLUX_PROTOCOL_MAX_INT = 2_147_483_647 #In theory this should be the int64 max but flux-line-protocol in quest db only supports up to int32 17 | 18 | def get_interval(interval:str)->IntervalTypes: 19 | if interval not in INTERVALS: 20 | raise ValueError(f"Unknown interval {interval}") 21 | return INTERVALS[interval] 22 | 23 | def interval_to_timedelta(interval:str)->timedelta: 24 | match interval: 25 | case "1m": 26 | return timedelta(minutes=1) 27 | case "2m": 28 | return timedelta(minutes=2) 29 | case "5m": 30 | return timedelta(minutes=5) 31 | case "15m": 32 | return timedelta(minutes=15) 33 | case "30m": 34 | return timedelta(minutes=30) 35 | case "60m": 36 | return timedelta(hours=1) 37 | case "90m": 38 | return timedelta(hours=1,minutes=30) 39 | case "1h": 40 | return timedelta(hours=1) 41 | case "1d": 42 | return timedelta(days=1) 43 | case "5d": 44 | return timedelta(days=5) 45 | case "1wk": 46 | return timedelta(weeks=1) 47 | case "1mo": 48 | return timedelta(days=30) 49 | case "3mo": 50 | return timedelta(days=90) 51 | case _: 52 | raise ValueError(f"Unknown interval {interval}") 53 | 54 | def difference(existing:list[str],tickers:list[Ticker])->list[Ticker]: 55 | return [ticker for ticker in tickers if ticker.ticker not in existing] 56 | 57 | def make_datetime_tz_aware(datetime:datetime): 58 | if datetime.tzinfo is not None and datetime.tzinfo.utcoffset(datetime) is not None: 59 | return datetime.astimezone(pytz.UTC) 60 | else: 61 | return pytz.utc.localize(datetime) 62 | 63 | def make_timestamp_tz_aware(timestamp:pd.Timestamp): 64 | if timestamp.tzinfo is not None and timestamp.tzinfo.utcoffset(timestamp) is not None: 65 | return timestamp.tz_convert(pytz.UTC) 66 | else: 67 | return timestamp.tz_localize(pytz.UTC) 68 | 69 | def create_point(timestamp:pd.Timestamp,row:pd.Series,ticker:Ticker,interval:str,buffer:Buffer,minimal_date:datetime=datetime(1, 1, 1))->bool: 70 | 71 | #Invalide Data skip this row 72 | if row.isnull().values.any(): 73 | return False 74 | 75 | timestamp = make_timestamp_tz_aware(timestamp) 76 | 77 | if timestamp.value <= 0 or timestamp <= minimal_date: 78 | return False 79 | 80 | buffer.row( 81 | f"interval_{interval}", 82 | symbols={ 83 | "exchange":ticker.exchange, 84 | "ticker":ticker.ticker 85 | }, 86 | columns={ 87 | "open":float(row["Open"]), 88 | "high":float(row["High"]), 89 | "low":float(row["Low"]), 90 | "close":float(row["Close"]), 91 | "adj_close":float(row["Adj Close"]), 92 | "volume": min(int(row["Volume"]),FLUX_PROTOCOL_MAX_INT) 93 | }, 94 | at=TimestampNanos(timestamp.value) 95 | ) 96 | return True 97 | 98 | def gather_data(exchange:str,executionContext:ExecutionContext,now:datetime=datetime.now()): 99 | """ 100 | Syncs the data of the given Exchange with the database 101 | """ 102 | for interval in CONFIGURED_INTERVALS: 103 | try: 104 | logging.info(f"Starting {exchange} - {interval}") 105 | executionContext.questClient.create_table(interval) 106 | flow(exchange,interval,executionContext,now) 107 | except Exception as e: 108 | logging.error(e) 109 | logging.debug(traceback.format_exc()) 110 | finally: 111 | logging.info(f"Finished {exchange} - {interval}") 112 | 113 | 114 | 115 | def download_in_slices(tickers:list[Ticker],interval:str,exchange:str,start:datetime,stop:datetime,executionContext:ExecutionContext,include_start:bool=True,slice_size:int=6)->pd.DataFrame: 116 | """ 117 | Some intraday data can only be downladed in slices of 6 Days at a time => we have to download in slices if we want to pull the last 30 days 118 | """ 119 | dif = (stop-start).days 120 | offsets = list(range(0,dif,slice_size)) 121 | if dif not in offsets: 122 | offsets.append(dif) 123 | 124 | for i,offset in enumerate(offsets[:-1]): 125 | local_start = start+timedelta(days=offset) 126 | local_end = start+timedelta(days=offsets[i+1]) 127 | data,errors = executionContext.yfDataProcider.get_data([ticker.ticker for ticker in tickers],local_start,local_end,interval) 128 | handle_errors(errors,executionContext) 129 | #for many tickers (> 10.000) we get a lot of data (> 10GB) => we need to commit it to the database in slices 130 | if data is not None: 131 | store_points(data,tickers,f"Intraday Tickers (Slice {i+1}/{len(offsets[:-1])})",executionContext,interval,exchange,datetime(1, 1, 1) if include_start else start) 132 | else: 133 | joined_tickers = ",".join(tickers) 134 | logging.warning(f"Could not download data for {joined_tickers} from {local_start} to {local_end}") 135 | 136 | 137 | def store_points(data:pd.DataFrame,tickers:list[Ticker],message:str,executionContext:ExecutionContext,interval:str,exchange:str,minimal_date:datetime=datetime(1, 1, 1))->None: 138 | logging.info(f"[{message}] Storing Points ({interval}) for exchange {exchange} ...") 139 | minimal_date = make_datetime_tz_aware(minimal_date) 140 | stored_points = 0 141 | current_iteration = 0 142 | buffer = Buffer(init_capacity=1024*1024) 143 | for ticker in tqdm(tickers,f"[{message}] Storing Points ({interval}) for exchange {exchange} ..."): 144 | if ticker.ticker in data.columns: 145 | try: 146 | for timestamp,row in data[ticker.ticker].iterrows(): 147 | if create_point(timestamp,row,ticker,interval,buffer,minimal_date): 148 | current_iteration += 1 149 | 150 | if current_iteration > 30_000: 151 | executionContext.questClient.store_points(buffer) 152 | stored_points += current_iteration 153 | current_iteration = 0 154 | except Exception as e: 155 | logging.error(e) 156 | logging.debug(traceback.format_exc()) 157 | 158 | if current_iteration > 0: 159 | executionContext.questClient.store_points(buffer) 160 | stored_points += current_iteration 161 | current_iteration = 0 162 | 163 | logging.info(f"[{message}] Stored {stored_points} Points ({interval}) for exchange {exchange}!") 164 | 165 | 166 | 167 | def handle_errors(errors:dict,executionContext:ExecutionContext): 168 | """ 169 | Handles the yFinanced errors and removes faulted tickers from the repository 170 | """ 171 | if len(errors)>0: 172 | logging.warning(f"{len(errors)} errors occured") 173 | #TODO 174 | # removed = [] 175 | # for ticker,error in errors.items(): 176 | # if error == "No data found for this date range, symbol may be delisted": 177 | # if executionContext.tickerRepository.remove(ticker): 178 | # removed.append(ticker) 179 | # if len(removed) > 0: 180 | # logging.debug(f"Removed {','.join(removed)} from the repository!") 181 | 182 | 183 | def flow(exchange:str,interval:str,executionContext:ExecutionContext,now:datetime): 184 | """ 185 | Flow to download data for a given exchange and interval 186 | """ 187 | interval_type = get_interval(interval) 188 | time_delta = interval_to_timedelta(interval) 189 | 190 | tickers = list(executionContext.tickerRepository.exchanges[exchange].values()) 191 | if len(tickers) == 0: 192 | raise ValueError(f"No tickers found for exchange {exchange}") 193 | 194 | #First we check if the ticker is in the database if not we download the max from YFinance and add it 195 | existing_tickers = executionContext.questClient.get_existing_tickers_for_interval(interval,exchange) 196 | tickers_to_gather = difference(existing_tickers,tickers) 197 | if len(tickers_to_gather) > 0: 198 | data = None 199 | if interval_type == IntervalTypes.Daily: 200 | data,errors = executionContext.yfDataProcider.get_data_from_period([ticker.ticker for ticker in tickers_to_gather],interval) 201 | handle_errors(errors,executionContext) 202 | if data is not None: 203 | store_points(data,tickers_to_gather,"New Tickers",executionContext,interval,exchange) 204 | else: 205 | #Maximum for Intraday is 30 days 206 | download_in_slices(tickers_to_gather,interval,exchange,now-timedelta(days=29),now,executionContext,) 207 | 208 | #If we already have data for an stock we just download the latest data 209 | #1. We ignore the stocks we just downloaded 210 | tickers_to_check = difference([ticker.ticker for ticker in tickers_to_gather],tickers) 211 | 212 | #2. Querry the database for the last date we got data for each stock and group by datetime 213 | tickers_to_gather = {} 214 | last_entries = executionContext.questClient.get_last_entry_dates(interval) 215 | for ticker in tickers_to_check: 216 | if ticker.ticker in last_entries: 217 | last_date = last_entries[ticker.ticker] 218 | #this schould never happen 219 | if last_date == now: 220 | continue 221 | 222 | if last_date not in tickers_to_gather: 223 | tickers_to_gather[last_date] = [ticker] 224 | else: 225 | tickers_to_gather[last_date].append(ticker) 226 | else: 227 | logging.warning(f"Can't find last date for {ticker.ticker}!") 228 | 229 | #3. Download the data from YFinance 230 | for date in tickers_to_gather: 231 | if now-date < time_delta: 232 | #We cant download data from the future (e.g we want to download an interval of 7days => we can only downlaod 7 days after the last date) 233 | continue 234 | 235 | batched_tickers = tickers_to_gather[date] 236 | batched_tickers_names = [ticker.ticker for ticker in batched_tickers] 237 | data = None 238 | 239 | if interval_type == IntervalTypes.Intraday and now-date > timedelta(days=6): 240 | if now-date > timedelta(days=30): 241 | #we can only get the last 30 days 242 | logging.warning(f"[WARNING] The last entry for {','.join(batched_tickers_names)} is older than 30 days! Only the last 30 days will be downloaded!") 243 | download_in_slices(batched_tickers,interval,exchange,now-timedelta(days=29),now,executionContext,include_start=False) 244 | else: 245 | #we have to download in slices 246 | download_in_slices(batched_tickers,interval,exchange,date,now,executionContext,include_start=False) 247 | else: 248 | data,errors = executionContext.yfDataProcider.get_data(batched_tickers_names,date,now,interval) 249 | handle_errors(errors,executionContext) 250 | if data is not None: 251 | store_points(data,batched_tickers,"Existing Tickers",executionContext,interval,exchange,date) 252 | -------------------------------------------------------------------------------- /src/news/src/finance_news_scraper/mongo_client.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | import os 3 | from finance_news_scraper.news_sources import News_Item 4 | from newspaper import Article 5 | import numpy as np 6 | from pymongo import ASCENDING, DESCENDING 7 | from pymongo.typings import _CollationIn, _DocumentIn, _DocumentType, _Pipeline 8 | from pymongo.cursor import Cursor 9 | from typing import Optional 10 | from datetime import datetime 11 | import pandas as pd 12 | 13 | HOST = os.getenv('NEWSSCRAPER_MONGODB_HOST',"localhost") 14 | PORT = int(os.getenv('NEWSSCRAPER_MONGODB_PORT',"27017")) 15 | USERNAME = os.getenv('NEWSSCRAPER_MONGODB_USERNAME',"admin") 16 | PASSWORD = os.getenv('NEWSSCRAPER_MONGODB_PASSWORD',"asda2sdqw12e4asfd") 17 | DB_NAME = os.getenv('NEWSSCRAPER_MONGODB_DBNAME',"news") 18 | ARTICLE_COLLECTION_NAME = os.getenv('NEWSSCRAPER_MONGODB_ARTICLE_COLLECTIONNAME',"articles") 19 | SENTIMENT_COLLECTION_NAME = os.getenv('NEWSSCRAPER_MONGODB_SENTIMENT_COLLECTIONNAME',"sentiments") 20 | 21 | class MongoDBClient(object): 22 | def __init__(self,host:str=HOST,port:int=PORT,username:str=USERNAME,password:str=PASSWORD,better_compression=True) -> None: 23 | self.client = pymongo.MongoClient(host=host, port=port, username=username, password=password) 24 | 25 | #create the db and collections with indexes 26 | self.db = self.client[DB_NAME] 27 | collections = self.db.list_collection_names() 28 | if ARTICLE_COLLECTION_NAME not in collections: 29 | if better_compression: 30 | self.db.create_collection(ARTICLE_COLLECTION_NAME,storageEngine={"wiredTiger": {"configString": "block_compressor=zstd"}}) 31 | else: 32 | self.db.create_collection(ARTICLE_COLLECTION_NAME) 33 | self.article_collection = self.db[ARTICLE_COLLECTION_NAME] 34 | self.article_collection.create_index([("date",DESCENDING)],background=True) 35 | self.article_collection.create_index([("hash",ASCENDING)],background=True) 36 | else: 37 | self.article_collection = self.db[ARTICLE_COLLECTION_NAME] 38 | 39 | 40 | if SENTIMENT_COLLECTION_NAME not in collections: 41 | if better_compression: 42 | self.db.create_collection(SENTIMENT_COLLECTION_NAME,storageEngine={"wiredTiger": {"configString": "block_compressor=zstd"}}) 43 | else: 44 | self.db.create_collection(SENTIMENT_COLLECTION_NAME) 45 | self.sentiment_collection = self.db[SENTIMENT_COLLECTION_NAME] 46 | self.sentiment_collection.create_index([("date",DESCENDING)],background=True) 47 | self.sentiment_collection.create_index([("article_hash",ASCENDING)],background=True) 48 | else: 49 | self.sentiment_collection = self.db[SENTIMENT_COLLECTION_NAME] 50 | 51 | 52 | def insert_sentiment(self,sentiment:tuple[int,np.ndarray],hash:str,datetime:datetime,tickers:list[str])->None: 53 | data={ 54 | 'article_hash':hash, 55 | 'tickers':tickers, 56 | 'date':datetime, 57 | 'sentiment':{ 58 | 'class':sentiment[0], 59 | 'probabilities':sentiment[1].tolist() 60 | } 61 | } 62 | self.sentiment_collection.insert_one(data) 63 | 64 | def build_document(self,item:News_Item,article:Article)->dict: 65 | return { 66 | 'url': item.link, 67 | 'hash': item.hash, 68 | 'text': article.text, 69 | 'authors': article.authors, 70 | 'tickers': item.tickers, 71 | 'date': item.pub_date, 72 | 'publisher': item.publisher, 73 | } 74 | 75 | def insert_article(self,data:dict)->None: 76 | self.article_collection.insert_one(data) 77 | 78 | def insert_articles(self,articles:list[dict])->None: 79 | if len(articles) > 0: 80 | self.article_collection.insert_many(articles) 81 | 82 | def find_article_by_hash(self,hash:str)->Optional[_DocumentType]: 83 | """ 84 | Returns an article if it exists in the database 85 | """ 86 | return self.article_collection.find_one({'hash':hash}) 87 | 88 | def find_sentiment_by_hash(self,hash:str)->Optional[_DocumentType]: 89 | """ 90 | Returns a sentiment if it exists in the database 91 | """ 92 | return self.sentiment_collection.find_one({'article_hash':hash}) 93 | 94 | def get_all_article_hashes(self)->set[str]: 95 | return set(self.article_collection.distinct('hash')) 96 | 97 | def get_all_sentiment_hashes(self)->set[str]: 98 | return set(self.sentiment_collection.distinct('article_hash')) 99 | 100 | def __update_tickers(self,collection,_id:_DocumentIn,old_tickers:list,new_tickers:list)->bool: 101 | tickers=[] 102 | needs_update = False 103 | 104 | if new_tickers: 105 | if not old_tickers: 106 | needs_update = True 107 | tickers = list(set(new_tickers)) 108 | else: 109 | if not set(old_tickers) == set(new_tickers): 110 | needs_update = True 111 | tickers = list(set(old_tickers+new_tickers)) 112 | 113 | if needs_update: 114 | collection.update_one({'_id':_id},{'$set':{'tickers':tickers}}) 115 | return True 116 | return False 117 | 118 | def update_sentiment_tickers(self,_id:_DocumentIn,old_tickers:list,new_tickers:list)->bool: 119 | """ 120 | Updates the tickers of an sentiment if needed 121 | """ 122 | return self.__update_tickers(self.sentiment_collection,_id,old_tickers,new_tickers) 123 | 124 | 125 | def update_article_tickers(self,_id:_DocumentIn,old_tickers:list,new_tickers:list)->bool: 126 | """ 127 | Updates the tickers of an article if needed 128 | """ 129 | return self.__update_tickers(self.article_collection,_id,old_tickers,new_tickers) 130 | 131 | 132 | def __get_by_tickers_and_date(self,collection,tickers:list[str],start:datetime=None,end:datetime=None)->Cursor[_DocumentType]: 133 | """ 134 | Find all documents that match the tickers and the date range 135 | """ 136 | 137 | tickers = list(ticker.upper() for ticker in set(tickers)) 138 | if len(tickers) < 1: 139 | raise Exception("A ticker must be provided!") 140 | 141 | if start and end: 142 | result = collection.find({ 'tickers': { '$in': tickers },'date': {'$gte': start,'$lt': end}}) 143 | elif start and not end: 144 | result = collection.find({ 'tickers': { '$in': tickers },'date': {'$gte': start}}) 145 | elif end and not start: 146 | result = collection.find({ 'tickers': { '$in': tickers },'date': {'$lt': end}}) 147 | else: 148 | result = collection.find({ 'tickers': { '$in': tickers }}) 149 | return result.sort('date',DESCENDING) 150 | 151 | 152 | def get_articles(self,tickers:list[str],start:datetime=None,end:datetime=None)->Cursor[_DocumentType]: 153 | """ 154 | Find all articles that match the tickers and the date range 155 | """ 156 | return self.__get_by_tickers_and_date(self.article_collection,tickers,start,end) 157 | 158 | def get_raw_sentiments(self,tickers:list[str],start:datetime=None,end:datetime=None)->Cursor[_DocumentType]: 159 | """ 160 | Find all sentiments by tickers and the date range 161 | """ 162 | return self.__get_by_tickers_and_date(self.sentiment_collection,tickers,start,end) 163 | 164 | 165 | 166 | def __build_sentiment_dataframe(self,sentiments:list[dict])->pd.DataFrame: 167 | pd_data = [] 168 | for sentiment in sentiments: 169 | pd_data.append({"date":sentiment["date"],"sentiment":sentiment["sentiment"]["class"],"certainty":max(sentiment["sentiment"]["probabilities"])}) 170 | return pd.DataFrame(pd_data) 171 | 172 | 173 | 174 | def _get_weighted_sentiment(self,group:pd.DataFrame) -> float: 175 | if 'sentiment' in group.columns and 'certainty' in group.columns: 176 | return (group['sentiment'] * group['certainty']).mean() 177 | else: 178 | return pd.np.nan 179 | 180 | 181 | 182 | def _raw_sentiment_to_dataframe(self, 183 | sentiments:list[dict], 184 | frequency:str="d", 185 | fill_blanks:bool=True, 186 | interpolate_values:bool=True, 187 | interpolation:str='linear')->pd.DataFrame: 188 | #create df 189 | df = self.__build_sentiment_dataframe(sentiments=sentiments) 190 | #set the period and mean the sentiment 191 | df = (df 192 | .groupby(df['date'].dt.to_period(frequency).dt.start_time) 193 | .apply(lambda x: self._get_weighted_sentiment(x)) 194 | .reset_index(name='sentiment') 195 | .set_index('date')) 196 | 197 | #create the times we dont have data for 198 | if fill_blanks: 199 | df = df.asfreq(frequency) 200 | 201 | #interpolate all nan values 202 | if interpolate_values: 203 | df = df.interpolate(method='linear') 204 | 205 | return df 206 | 207 | 208 | def get_sentiments(self,tickers:str|list[str], 209 | start:datetime=None, 210 | end:datetime=None, 211 | frequency:str="d", 212 | fill_blanks:bool=True, 213 | interpolate_values:bool=True, 214 | interpolation:str='linear')->dict[str,pd.DataFrame]|pd.DataFrame: 215 | 216 | """ 217 | Retrievs the mean sentiment for the tickers and date range and returns a dataframe for each ticker. 218 | 219 | 'frequency': the frequency used to group the data. Can be 'd' for daily, 'w' for weekly, 'm' for monthly.For more Options see here: https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases 220 | 221 | 'fill_blanks': if True, the dataframe will be expanded to match the provided frequency. All created rows are initized with NaN. 222 | 223 | 'interpolate_values': if True all NaN values will be interpolated using the interpolation method. 224 | 225 | 'interpolation': Interpolation method. Can be 'linear' or other methode. See here: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.interpolate.html 226 | """ 227 | if tickers is None: 228 | raise Exception("A ticker must be provided!") 229 | 230 | if isinstance(tickers,str): 231 | tickers = [tickers] 232 | 233 | grouped_sentiments = {} 234 | data = self.get_raw_sentiments(tickers,start,end) 235 | ticker_set = set(tickers) 236 | 237 | #init the grouped sentiments 238 | for ticker in ticker_set: 239 | grouped_sentiments[ticker] = [] 240 | 241 | for sentiment in data: 242 | #get all tickers that match this sentiment 243 | sentiment_tickers = set(sentiment['tickers']) 244 | for matching in ticker_set & sentiment_tickers: 245 | grouped_sentiments[matching].append(sentiment) 246 | 247 | #build the dataframes 248 | 249 | dataframes = {} 250 | for ticker in ticker_set: 251 | df = self._raw_sentiment_to_dataframe(list(grouped_sentiments[ticker]),frequency,fill_blanks,interpolate_values,interpolation) 252 | dataframes[ticker] = df 253 | 254 | if len(dataframes) == 1: 255 | return next(iter(dataframes.values())) 256 | return dataframes 257 | 258 | 259 | 260 | -------------------------------------------------------------------------------- /tickers/nasdaq.csv: -------------------------------------------------------------------------------- 1 | tickers,shortNames,longNames 2 | BEN,"Franklin Resources, Inc.","Franklin Resources, Inc." 3 | CI,Cigna Corporation,Cigna Corporation 4 | CNC,Centene Corporation,Centene Corporation 5 | FRC,FIRST REPUBLIC BANK,First Republic Bank 6 | BXP,"Boston Properties, Inc.","Boston Properties, Inc." 7 | CMG,"Chipotle Mexican Grill, Inc.","Chipotle Mexican Grill, Inc." 8 | CFG,"Citizens Financial Group, Inc.","Citizens Financial Group, Inc." 9 | COST,Costco Wholesale Corporation,Costco Wholesale Corporation 10 | XYL,Xylem Inc.,Xylem Inc. 11 | T,AT&T Inc.,AT&T Inc. 12 | META,"Meta Platforms, Inc.","Meta Platforms, Inc." 13 | DXCM,"DexCom, Inc.","DexCom, Inc." 14 | BR,"Broadridge Financial Solutions,","Broadridge Financial Solutions, Inc." 15 | UNH,UnitedHealth Group Incorporated,UnitedHealth Group Incorporated 16 | ADM,Archer-Daniels-Midland Company,Archer-Daniels-Midland Company 17 | OTIS,Otis Worldwide Corporation,Otis Worldwide Corporation 18 | PKG,Packaging Corporation of Americ,Packaging Corporation of America 19 | SJM,J.M. Smucker Company (The) New,The J. M. Smucker Company 20 | CF,"CF Industries Holdings, Inc.","CF Industries Holdings, Inc." 21 | FBHS,"Fortune Brands Home & Security,","Fortune Brands Home & Security, Inc." 22 | ALB,Albemarle Corporation,Albemarle Corporation 23 | AVB,"AvalonBay Communities, Inc.","AvalonBay Communities, Inc." 24 | SRE,DBA Sempra,Sempra 25 | BSX,Boston Scientific Corporation,Boston Scientific Corporation 26 | CB,Chubb Limited,Chubb Limited 27 | ECL,Ecolab Inc.,Ecolab Inc. 28 | PHM,"PulteGroup, Inc.","PulteGroup, Inc." 29 | NDAQ,"Nasdaq, Inc.","Nasdaq, Inc." 30 | TWTR,"Twitter, Inc.","Twitter, Inc." 31 | EXPE,"Expedia Group, Inc.","Expedia Group, Inc." 32 | DRI,"Darden Restaurants, Inc.","Darden Restaurants, Inc." 33 | TT,Trane Technologies plc,Trane Technologies plc 34 | MMC,"Marsh & McLennan Companies, Inc","Marsh & McLennan Companies, Inc." 35 | CCI,Crown Castle Inc.,Crown Castle Inc. 36 | REGN,"Regeneron Pharmaceuticals, Inc.","Regeneron Pharmaceuticals, Inc." 37 | STT,State Street Corporation,State Street Corporation 38 | VRTX,Vertex Pharmaceuticals Incorpor,Vertex Pharmaceuticals Incorporated 39 | BMY,Bristol-Myers Squibb Company,Bristol-Myers Squibb Company 40 | AEP,American Electric Power Company,"American Electric Power Company, Inc." 41 | CAT,"Caterpillar, Inc.",Caterpillar Inc. 42 | FCX,"Freeport-McMoRan, Inc.",Freeport-McMoRan Inc. 43 | DVA,DaVita Inc.,DaVita Inc. 44 | LUMN,"Lumen Technologies, Inc.","Lumen Technologies, Inc." 45 | COF,Capital One Financial Corporati,Capital One Financial Corporation 46 | XEL,Xcel Energy Inc.,Xcel Energy Inc. 47 | TEL,TE Connectivity Ltd. New Switze,TE Connectivity Ltd. 48 | PPL,PPL Corporation,PPL Corporation 49 | SCHW,Charles Schwab Corporation (The,The Charles Schwab Corporation 50 | DOW,Dow Inc.,Dow Inc. 51 | IDXX,"IDEXX Laboratories, Inc.","IDEXX Laboratories, Inc." 52 | NUE,Nucor Corporation,Nucor Corporation 53 | STX,Seagate Technology Holdings PLC,Seagate Technology Holdings plc 54 | LHX,"L3Harris Technologies, Inc.","L3Harris Technologies, Inc." 55 | DD,"DuPont de Nemours, Inc.","DuPont de Nemours, Inc." 56 | FMC,FMC Corporation,FMC Corporation 57 | MAR,Marriott International,"Marriott International, Inc." 58 | KR,Kroger Company (The),The Kroger Co. 59 | ACN,Accenture plc,Accenture plc 60 | SEE,Sealed Air Corporation,Sealed Air Corporation 61 | WTW,Willis Towers Watson Public Lim,Willis Towers Watson Public Limited Company 62 | MDLZ,"Mondelez International, Inc.","Mondelez International, Inc." 63 | HBAN,Huntington Bancshares Incorpora,Huntington Bancshares Incorporated 64 | VRSN,"VeriSign, Inc.","VeriSign, Inc." 65 | UNP,Union Pacific Corporation,Union Pacific Corporation 66 | EL,"Estee Lauder Companies, Inc. (T",The Estée Lauder Companies Inc. 67 | PVH,PVH Corp.,PVH Corp. 68 | ULTA,"Ulta Beauty, Inc.","Ulta Beauty, Inc." 69 | PAYC,"Paycom Software, Inc.","Paycom Software, Inc." 70 | FOXA,Fox Corporation,Fox Corporation 71 | RMD,ResMed Inc.,ResMed Inc. 72 | HIG,Hartford Financial Services Gro,"The Hartford Financial Services Group, Inc." 73 | EW,Edwards Lifesciences Corporatio,Edwards Lifesciences Corporation 74 | DG,Dollar General Corporation,Dollar General Corporation 75 | WDC,Western Digital Corporation,Western Digital Corporation 76 | QRVO,"Qorvo, Inc.","Qorvo, Inc." 77 | ETN,"Eaton Corporation, PLC",Eaton Corporation plc 78 | EFX,"Equifax, Inc.",Equifax Inc. 79 | GNRC,Generac Holdlings Inc.,Generac Holdings Inc. 80 | APA,APA Corporation,APA Corporation 81 | BRO,"Brown & Brown, Inc.","Brown & Brown, Inc." 82 | FOX,Fox Corporation,Fox Corporation 83 | ATO,Atmos Energy Corporation,Atmos Energy Corporation 84 | ADBE,Adobe Inc.,Adobe Inc. 85 | XOM,Exxon Mobil Corporation,Exxon Mobil Corporation 86 | PFG,Principal Financial Group Inc,"Principal Financial Group, Inc." 87 | CME,CME Group Inc.,CME Group Inc. 88 | IRM,Iron Mountain Incorporated (Del,Iron Mountain Incorporated 89 | LDOS,"Leidos Holdings, Inc.","Leidos Holdings, Inc." 90 | FITB,Fifth Third Bancorp,Fifth Third Bancorp 91 | ETR,Entergy Corporation,Entergy Corporation 92 | CBRE,CBRE Group Inc,"CBRE Group, Inc." 93 | PSA,Public Storage,Public Storage 94 | NVR,"NVR, Inc.","NVR, Inc." 95 | MSFT,Microsoft Corporation,Microsoft Corporation 96 | COP,ConocoPhillips,ConocoPhillips 97 | CZR,"Caesars Entertainment, Inc.","Caesars Entertainment, Inc." 98 | MO,"Altria Group, Inc.","Altria Group, Inc." 99 | RHI,Robert Half International Inc.,Robert Half International Inc. 100 | ROL,"Rollins, Inc.","Rollins, Inc." 101 | WYNN,"Wynn Resorts, Limited","Wynn Resorts, Limited" 102 | TXT,Textron Inc.,Textron Inc. 103 | AAPL,Apple Inc.,Apple Inc. 104 | NEM,Newmont Corporation,Newmont Corporation 105 | SEDG,"SolarEdge Technologies, Inc.","SolarEdge Technologies, Inc." 106 | CE,Celanese Corporation Celanese C,Celanese Corporation 107 | EMR,Emerson Electric Company,Emerson Electric Co. 108 | ON,ON Semiconductor Corporation,ON Semiconductor Corporation 109 | MNST,Monster Beverage Corporation,Monster Beverage Corporation 110 | NKE,"Nike, Inc.","NIKE, Inc." 111 | GD,General Dynamics Corporation,General Dynamics Corporation 112 | IT,"Gartner, Inc.","Gartner, Inc." 113 | LRCX,Lam Research Corporation,Lam Research Corporation 114 | GRMN,Garmin Ltd.,Garmin Ltd. 115 | PEP,"Pepsico, Inc.","PepsiCo, Inc." 116 | EXR,Extra Space Storage Inc,Extra Space Storage Inc. 117 | MCO,Moody's Corporation,Moody's Corporation 118 | CBOE,"Cboe Global Markets, Inc.","Cboe Global Markets, Inc." 119 | JCI,Johnson Controls International ,Johnson Controls International plc 120 | VZ,Verizon Communications Inc.,Verizon Communications Inc. 121 | MMM,3M Company,3M Company 122 | ICE,Intercontinental Exchange Inc.,"Intercontinental Exchange, Inc." 123 | FLT,"FleetCor Technologies, Inc.","FLEETCOR Technologies, Inc." 124 | AMT,American Tower Corporation (REI,American Tower Corporation 125 | TRMB,Trimble Inc.,Trimble Inc. 126 | CTLT,"Catalent, Inc.","Catalent, Inc." 127 | DHI,"D.R. Horton, Inc.","D.R. Horton, Inc." 128 | DHR,Danaher Corporation,Danaher Corporation 129 | BWA,BorgWarner Inc.,BorgWarner Inc. 130 | PCAR,PACCAR Inc.,PACCAR Inc 131 | CSCO,"Cisco Systems, Inc.","Cisco Systems, Inc." 132 | O,Realty Income Corporation,Realty Income Corporation 133 | VICI,VICI Properties Inc.,VICI Properties Inc. 134 | TSN,"Tyson Foods, Inc.","Tyson Foods, Inc." 135 | CEG,Constellation Energy Corporatio,Constellation Energy Corporation 136 | JKHY,"Jack Henry & Associates, Inc.","Jack Henry & Associates, Inc." 137 | CTVA,"Corteva, Inc.","Corteva, Inc." 138 | DVN,Devon Energy Corporation,Devon Energy Corporation 139 | MAS,Masco Corporation,Masco Corporation 140 | FANG,"Diamondback Energy, Inc. - Comm","Diamondback Energy, Inc." 141 | MDT,Medtronic plc.,Medtronic plc 142 | CPB,Campbell Soup Company,Campbell Soup Company 143 | PENN,"PENN Entertainment, Inc.","PENN Entertainment, Inc." 144 | STZ,"Constellation Brands, Inc.","Constellation Brands, Inc." 145 | AMZN,"Amazon.com, Inc.","Amazon.com, Inc." 146 | GLW,Corning Incorporated,Corning Incorporated 147 | SNPS,"Synopsys, Inc.","Synopsys, Inc." 148 | MTCH,"Match Group, Inc.","Match Group, Inc." 149 | YUM,"Yum! Brands, Inc.","Yum! Brands, Inc." 150 | BBY,"Best Buy Co., Inc.","Best Buy Co., Inc." 151 | BLK,"BlackRock, Inc.","BlackRock, Inc." 152 | ILMN,"Illumina, Inc.","Illumina, Inc." 153 | AFL,AFLAC Incorporated,Aflac Incorporated 154 | ORCL,Oracle Corporation,Oracle Corporation 155 | GIS,"General Mills, Inc.","General Mills, Inc." 156 | TRV,"The Travelers Companies, Inc.","The Travelers Companies, Inc." 157 | RJF,"Raymond James Financial, Inc.","Raymond James Financial, Inc." 158 | SNA,Snap-On Incorporated,Snap-on Incorporated 159 | GPC,Genuine Parts Company,Genuine Parts Company 160 | WRB,W.R. Berkley Corporation,W. R. Berkley Corporation 161 | ETSY,"Etsy, Inc.","Etsy, Inc." 162 | CMI,Cummins Inc.,Cummins Inc. 163 | EXC,Exelon Corporation,Exelon Corporation 164 | MAA,Mid-America Apartment Communiti,"Mid-America Apartment Communities, Inc." 165 | MOS,Mosaic Company (The),The Mosaic Company 166 | PLD,"Prologis, Inc.","Prologis, Inc." 167 | AIG,"American International Group, I","American International Group, Inc." 168 | DLR,"Digital Realty Trust, Inc.","Digital Realty Trust, Inc." 169 | CINF,Cincinnati Financial Corporatio,Cincinnati Financial Corporation 170 | MPWR,"Monolithic Power Systems, Inc.","Monolithic Power Systems, Inc." 171 | AAL,"American Airlines Group, Inc.",American Airlines Group Inc. 172 | FDS,FactSet Research Systems Inc.,FactSet Research Systems Inc. 173 | LH,Laboratory Corporation of Ameri,Laboratory Corporation of America Holdings 174 | ABMD,"ABIOMED, Inc.","Abiomed, Inc." 175 | FDX,FedEx Corporation,FedEx Corporation 176 | MTB,M&T Bank Corporation,M&T Bank Corporation 177 | TYL,"Tyler Technologies, Inc.","Tyler Technologies, Inc." 178 | BF-B,Brown Forman Inc,Brown-Forman Corporation 179 | EQR,Equity Residential,Equity Residential 180 | AWK,"American Water Works Company, I","American Water Works Company, Inc." 181 | FTNT,"Fortinet, Inc.","Fortinet, Inc." 182 | WBA,"Walgreens Boots Alliance, Inc.","Walgreens Boots Alliance, Inc." 183 | CMA,Comerica Incorporated,Comerica Incorporated 184 | AEE,Ameren Corporation,Ameren Corporation 185 | FE,FirstEnergy Corp.,FirstEnergy Corp. 186 | AES,The AES Corporation,The AES Corporation 187 | IP,International Paper Company,International Paper Company 188 | PXD,Pioneer Natural Resources Compa,Pioneer Natural Resources Company 189 | OKE,"ONEOK, Inc.","ONEOK, Inc." 190 | QCOM,QUALCOMM Incorporated,QUALCOMM Incorporated 191 | MCK,McKesson Corporation,McKesson Corporation 192 | ROK,"Rockwell Automation, Inc.","Rockwell Automation, Inc." 193 | RE,"Everest Re Group, Ltd.","Everest Re Group, Ltd." 194 | CL,Colgate-Palmolive Company,Colgate-Palmolive Company 195 | HD,"Home Depot, Inc. (The)","The Home Depot, Inc." 196 | AMGN,Amgen Inc.,Amgen Inc. 197 | ED,"Consolidated Edison, Inc.","Consolidated Edison, Inc." 198 | KHC,The Kraft Heinz Company,The Kraft Heinz Company 199 | NRG,"NRG Energy, Inc.","NRG Energy, Inc." 200 | PFE,"Pfizer, Inc.",Pfizer Inc. 201 | PNC,"PNC Financial Services Group, I","The PNC Financial Services Group, Inc." 202 | PKI,"PerkinElmer, Inc.","PerkinElmer, Inc." 203 | ISRG,"Intuitive Surgical, Inc.","Intuitive Surgical, Inc." 204 | BAX,Baxter International Inc.,Baxter International Inc. 205 | DISH,DISH Network Corporation,DISH Network Corporation 206 | D,"Dominion Energy, Inc.","Dominion Energy, Inc." 207 | SBUX,Starbucks Corporation,Starbucks Corporation 208 | XRAY,DENTSPLY SIRONA Inc.,DENTSPLY SIRONA Inc. 209 | CVS,CVS Health Corporation,CVS Health Corporation 210 | CCL,Carnival Corporation,Carnival Corporation & plc 211 | APD,"Air Products and Chemicals, Inc","Air Products and Chemicals, Inc." 212 | BKR,Baker Hughes Company,Baker Hughes Company 213 | GE,General Electric Company,General Electric Company 214 | HUM,Humana Inc.,Humana Inc. 215 | INTU,Intuit Inc.,Intuit Inc. 216 | LW,"Lamb Weston Holdings, Inc.","Lamb Weston Holdings, Inc." 217 | WY,Weyerhaeuser Company,Weyerhaeuser Company 218 | NSC,Norfolk Southern Corporation,Norfolk Southern Corporation 219 | NWL,Newell Brands Inc.,Newell Brands Inc. 220 | DOV,Dover Corporation,Dover Corporation 221 | NLSN,Nielsen N.V.,Nielsen Holdings plc 222 | EQIX,"Equinix, Inc.","Equinix, Inc." 223 | MRNA,"Moderna, Inc.","Moderna, Inc." 224 | ABBV,AbbVie Inc.,AbbVie Inc. 225 | RCL,D/B/A Royal Caribbean Cruises L,Royal Caribbean Cruises Ltd. 226 | PRU,"Prudential Financial, Inc.","Prudential Financial, Inc." 227 | DFS,Discover Financial Services,Discover Financial Services 228 | HRL,Hormel Foods Corporation,Hormel Foods Corporation 229 | PSX,Phillips 66,Phillips 66 230 | PEAK,"Healthpeak Properties, Inc.","Healthpeak Properties, Inc." 231 | ENPH,"Enphase Energy, Inc.","Enphase Energy, Inc." 232 | WHR,Whirlpool Corporation,Whirlpool Corporation 233 | GWW,"W.W. Grainger, Inc.","W.W. Grainger, Inc." 234 | CDNS,"Cadence Design Systems, Inc.","Cadence Design Systems, Inc." 235 | SYK,Stryker Corporation,Stryker Corporation 236 | RF,Regions Financial Corporation,Regions Financial Corporation 237 | KMX,CarMax Inc,"CarMax, Inc." 238 | GOOG,Alphabet Inc.,Alphabet Inc. 239 | EA,Electronic Arts Inc.,Electronic Arts Inc. 240 | CLX,Clorox Company (The),The Clorox Company 241 | FFIV,"F5, Inc.","F5, Inc." 242 | L,Loews Corporation,Loews Corporation 243 | FRT,Federal Realty Investment Trust,Federal Realty Investment Trust 244 | CHRW,"C.H. Robinson Worldwide, Inc.","C.H. Robinson Worldwide, Inc." 245 | PWR,"Quanta Services, Inc.","Quanta Services, Inc." 246 | AJG,Arthur J. Gallagher & Co.,Arthur J. Gallagher & Co. 247 | BBWI,"Bath & Body Works, Inc.","Bath & Body Works, Inc." 248 | GOOGL,Alphabet Inc.,Alphabet Inc. 249 | LEN,Lennar Corporation,Lennar Corporation 250 | CARR,Carrier Global Corporation,Carrier Global Corporation 251 | ZTS,Zoetis Inc.,Zoetis Inc. 252 | SIVB,SVB Financial Group,SVB Financial Group 253 | TFX,Teleflex Incorporated,Teleflex Incorporated 254 | PNR,Pentair plc.,Pentair plc 255 | JNPR,"Juniper Networks, Inc.","Juniper Networks, Inc." 256 | TAP,Molson Coors Beverage Company,Molson Coors Beverage Company 257 | SYY,Sysco Corporation,Sysco Corporation 258 | HAS,"Hasbro, Inc.","Hasbro, Inc." 259 | VLO,Valero Energy Corporation,Valero Energy Corporation 260 | MCHP,Microchip Technology Incorporat,Microchip Technology Incorporated 261 | GS,"Goldman Sachs Group, Inc. (The)","The Goldman Sachs Group, Inc." 262 | SYF,Synchrony Financial,Synchrony Financial 263 | RL,Ralph Lauren Corporation,Ralph Lauren Corporation 264 | PG,Procter & Gamble Company (The),The Procter & Gamble Company 265 | PGR,Progressive Corporation (The),The Progressive Corporation 266 | MRO,Marathon Oil Corporation,Marathon Oil Corporation 267 | NXPI,NXP Semiconductors N.V.,NXP Semiconductors N.V. 268 | BKNG,Booking Holdings Inc. Common St,Booking Holdings Inc. 269 | PH,Parker-Hannifin Corporation,Parker-Hannifin Corporation 270 | BA,Boeing Company (The),The Boeing Company 271 | ARE,Alexandria Real Estate Equities,"Alexandria Real Estate Equities, Inc." 272 | KLAC,KLA Corporation,KLA Corporation 273 | ALL,Allstate Corporation (The),The Allstate Corporation 274 | LOW,"Lowe's Companies, Inc.","Lowe's Companies, Inc." 275 | ODFL,"Old Dominion Freight Line, Inc.","Old Dominion Freight Line, Inc." 276 | PAYX,"Paychex, Inc.","Paychex, Inc." 277 | OMC,Omnicom Group Inc.,Omnicom Group Inc. 278 | AZO,"AutoZone, Inc.","AutoZone, Inc." 279 | ITW,Illinois Tool Works Inc.,Illinois Tool Works Inc. 280 | ANSS,"ANSYS, Inc.","ANSYS, Inc." 281 | AON,Aon plc,Aon plc 282 | SBNY,Signature Bank,Signature Bank 283 | TSLA,"Tesla, Inc.","Tesla, Inc." 284 | KIM,Kimco Realty Corporation,Kimco Realty Corporation 285 | COO,"The Cooper Companies, Inc.","The Cooper Companies, Inc." 286 | LNC,Lincoln National Corporation,Lincoln National Corporation 287 | SWK,"Stanley Black & Decker, Inc.","Stanley Black & Decker, Inc." 288 | CRL,Charles River Laboratories Inte,"Charles River Laboratories International, Inc." 289 | HES,Hess Corporation,Hess Corporation 290 | PARA,Paramount Global,Paramount Global 291 | IBM,International Business Machines,International Business Machines Corporation 292 | MET,"MetLife, Inc.","MetLife, Inc." 293 | SBAC,SBA Communications Corporation,SBA Communications Corporation 294 | STE,STERIS plc (Ireland),STERIS plc 295 | TFC,Truist Financial Corporation,Truist Financial Corporation 296 | KMB,Kimberly-Clark Corporation,Kimberly-Clark Corporation 297 | BALL,Ball Corporation,Ball Corporation 298 | KMI,"Kinder Morgan, Inc.","Kinder Morgan, Inc." 299 | CHD,"Church & Dwight Company, Inc.","Church & Dwight Co., Inc." 300 | DE,Deere & Company,Deere & Company 301 | CPT,Camden Property Trust,Camden Property Trust 302 | DRE,Duke Realty Corporation,Duke Realty Corporation 303 | JBHT,"J.B. Hunt Transport Services, I","J.B. Hunt Transport Services, Inc." 304 | ADP,"Automatic Data Processing, Inc.","Automatic Data Processing, Inc." 305 | VFC,V.F. Corporation,V.F. Corporation 306 | ROST,"Ross Stores, Inc.","Ross Stores, Inc." 307 | JPM,JP Morgan Chase & Co.,JPMorgan Chase & Co. 308 | HSY,The Hershey Company,The Hershey Company 309 | PYPL,"PayPal Holdings, Inc.","PayPal Holdings, Inc." 310 | ORLY,"O'Reilly Automotive, Inc.","O'Reilly Automotive, Inc." 311 | TROW,"T. Rowe Price Group, Inc.","T. Rowe Price Group, Inc." 312 | EMN,Eastman Chemical Company,Eastman Chemical Company 313 | AKAM,"Akamai Technologies, Inc.","Akamai Technologies, Inc." 314 | GPN,Global Payments Inc.,Global Payments Inc. 315 | NI,NiSource Inc,NiSource Inc. 316 | HON,Honeywell International Inc.,Honeywell International Inc. 317 | LUV,Southwest Airlines Company,Southwest Airlines Co. 318 | MLM,"Martin Marietta Materials, Inc.","Martin Marietta Materials, Inc." 319 | DGX,Quest Diagnostics Incorporated,Quest Diagnostics Incorporated 320 | AMD,"Advanced Micro Devices, Inc.","Advanced Micro Devices, Inc." 321 | NLOK,NortonLifeLock Inc.,NortonLifeLock Inc. 322 | OGN,Organon & Co.,Organon & Co. 323 | NWS,News Corporation,News Corporation 324 | APH,Amphenol Corporation,Amphenol Corporation 325 | AOS,A.O. Smith Corporation,A. O. Smith Corporation 326 | LYB,LyondellBasell Industries NV,LyondellBasell Industries N.V. 327 | SO,Southern Company (The),The Southern Company 328 | AVGO,Broadcom Inc.,Broadcom Inc. 329 | ALLE,Allegion plc,Allegion plc 330 | HWM,Howmet Aerospace Inc.,Howmet Aerospace Inc. 331 | CTAS,Cintas Corporation,Cintas Corporation 332 | PEG,Public Service Enterprise Group,Public Service Enterprise Group Incorporated 333 | HOLX,"Hologic, Inc.","Hologic, Inc." 334 | VRSK,"Verisk Analytics, Inc.","Verisk Analytics, Inc." 335 | DPZ,Domino's Pizza Inc,"Domino's Pizza, Inc." 336 | WM,"Waste Management, Inc.","Waste Management, Inc." 337 | TMUS,"T-Mobile US, Inc.","T-Mobile US, Inc." 338 | WST,"West Pharmaceutical Services, I","West Pharmaceutical Services, Inc." 339 | EOG,"EOG Resources, Inc.","EOG Resources, Inc." 340 | F,Ford Motor Company,Ford Motor Company 341 | MCD,McDonald's Corporation,McDonald's Corporation 342 | TECH,Bio-Techne Corp,Bio-Techne Corporation 343 | LYV,"Live Nation Entertainment, Inc.","Live Nation Entertainment, Inc." 344 | CDAY,Ceridian HCM Holding Inc.,Ceridian HCM Holding Inc. 345 | HCA,"HCA Healthcare, Inc.","HCA Healthcare, Inc." 346 | PPG,"PPG Industries, Inc.","PPG Industries, Inc." 347 | DTE,DTE Energy Company,DTE Energy Company 348 | HST,Host Hotels,"Host Hotels & Resorts, Inc." 349 | ADI,"Analog Devices, Inc.","Analog Devices, Inc." 350 | PNW,Pinnacle West Capital Corporati,Pinnacle West Capital Corporation 351 | V,Visa Inc.,Visa Inc. 352 | NFLX,"Netflix, Inc.","Netflix, Inc." 353 | MPC,Marathon Petroleum Corporation,Marathon Petroleum Corporation 354 | EVRG,"Evergy, Inc.","Evergy, Inc." 355 | AME,"AMETEK, Inc.","AMETEK, Inc." 356 | DIS,Walt Disney Company (The),The Walt Disney Company 357 | VMC,Vulcan Materials Company (Holdi,Vulcan Materials Company 358 | MRK,"Merck & Company, Inc.","Merck & Co., Inc." 359 | DUK,Duke Energy Corporation (Holdin,Duke Energy Corporation 360 | NDSN,Nordson Corporation,Nordson Corporation 361 | AAP,Advance Auto Parts Inc.,"Advance Auto Parts, Inc." 362 | CMCSA,Comcast Corporation,Comcast Corporation 363 | INCY,Incyte Corporation,Incyte Corporation 364 | SPG,"Simon Property Group, Inc.","Simon Property Group, Inc." 365 | CDW,CDW Corporation,CDW Corporation 366 | EPAM,"EPAM Systems, Inc.","EPAM Systems, Inc." 367 | DLTR,"Dollar Tree, Inc.","Dollar Tree, Inc." 368 | SWKS,"Skyworks Solutions, Inc.","Skyworks Solutions, Inc." 369 | IPG,"Interpublic Group of Companies,","The Interpublic Group of Companies, Inc." 370 | CRM,"Salesforce, Inc.","Salesforce, Inc." 371 | NOW,"ServiceNow, Inc.","ServiceNow, Inc." 372 | GM,General Motors Company,General Motors Company 373 | APTV,Aptiv PLC,Aptiv PLC 374 | UPS,"United Parcel Service, Inc.","United Parcel Service, Inc." 375 | IFF,International Flavors & Fragran,International Flavors & Fragrances Inc. 376 | CSX,CSX Corporation,CSX Corporation 377 | HLT,Hilton Worldwide Holdings Inc.,Hilton Worldwide Holdings Inc. 378 | WFC,Wells Fargo & Company,Wells Fargo & Company 379 | FTV,Fortive Corporation,Fortive Corporation 380 | RTX,Raytheon Technologies Corporati,Raytheon Technologies Corporation 381 | BDX,"Becton, Dickinson and Company","Becton, Dickinson and Company" 382 | CMS,CMS Energy Corporation,CMS Energy Corporation 383 | ALGN,"Align Technology, Inc.","Align Technology, Inc." 384 | C,"Citigroup, Inc.",Citigroup Inc. 385 | EXPD,Expeditors International of Was,"Expeditors International of Washington, Inc." 386 | J,Jacobs Engineering Group Inc.,Jacobs Engineering Group Inc. 387 | MOH,Molina Healthcare Inc,"Molina Healthcare, Inc." 388 | VTR,"Ventas, Inc.","Ventas, Inc." 389 | DAL,"Delta Air Lines, Inc.","Delta Air Lines, Inc." 390 | JNJ,Johnson & Johnson,Johnson & Johnson 391 | MTD,"Mettler-Toledo International, I",Mettler-Toledo International Inc. 392 | HII,"Huntington Ingalls Industries, ","Huntington Ingalls Industries, Inc." 393 | MU,"Micron Technology, Inc.","Micron Technology, Inc." 394 | WELL,Welltower Inc.,Welltower Inc. 395 | BIIB,Biogen Inc.,Biogen Inc. 396 | ATVI,"Activision Blizzard, Inc","Activision Blizzard, Inc." 397 | TSCO,Tractor Supply Company,Tractor Supply Company 398 | VTRS,Viatris Inc.,Viatris Inc. 399 | URI,"United Rentals, Inc.","United Rentals, Inc." 400 | AVY,Avery Dennison Corporation,Avery Dennison Corporation 401 | FISV,"Fiserv, Inc.","Fiserv, Inc." 402 | NTRS,Northern Trust Corporation,Northern Trust Corporation 403 | MSCI,MSCI Inc,MSCI Inc. 404 | ESS,"Essex Property Trust, Inc.","Essex Property Trust, Inc." 405 | LMT,Lockheed Martin Corporation,Lockheed Martin Corporation 406 | RSG,"Republic Services, Inc.","Republic Services, Inc." 407 | NCLH,Norwegian Cruise Line Holdings ,Norwegian Cruise Line Holdings Ltd. 408 | NEE,"NextEra Energy, Inc.","NextEra Energy, Inc." 409 | GL,Globe Life Inc.,Globe Life Inc. 410 | WRK,Westrock Company,WestRock Company 411 | LNT,Alliant Energy Corporation,Alliant Energy Corporation 412 | TTWO,"Take-Two Interactive Software, ","Take-Two Interactive Software, Inc." 413 | AMP,"Ameriprise Financial, Inc.","Ameriprise Financial, Inc." 414 | TER,"Teradyne, Inc.","Teradyne, Inc." 415 | HAL,Halliburton Company,Halliburton Company 416 | ZBH,"Zimmer Biomet Holdings, Inc.","Zimmer Biomet Holdings, Inc." 417 | DXC,DXC Technology Company,DXC Technology Company 418 | TGT,Target Corporation,Target Corporation 419 | KDP,Keurig Dr Pepper Inc.,Keurig Dr Pepper Inc. 420 | MGM,MGM Resorts International,MGM Resorts International 421 | CTRA,Coterra Energy Inc.,Coterra Energy Inc. 422 | BIO,"Bio-Rad Laboratories, Inc.","Bio-Rad Laboratories, Inc." 423 | CAH,"Cardinal Health, Inc.","Cardinal Health, Inc." 424 | NOC,Northrop Grumman Corporation,Northrop Grumman Corporation 425 | OXY,Occidental Petroleum Corporatio,Occidental Petroleum Corporation 426 | WAT,Waters Corporation,Waters Corporation 427 | USB,U.S. Bancorp,U.S. Bancorp 428 | KEYS,Keysight Technologies Inc.,"Keysight Technologies, Inc." 429 | BK,The Bank of New York Mellon Cor,The Bank of New York Mellon Corporation 430 | WBD,"Warner Bros. Discovery, Inc. - ","Warner Bros. Discovery, Inc." 431 | K,Kellogg Company,Kellogg Company 432 | CVX,Chevron Corporation,Chevron Corporation 433 | TDG,Transdigm Group Incorporated,TransDigm Group Incorporated 434 | ALK,"Alaska Air Group, Inc.","Alaska Air Group, Inc." 435 | BRK-B,Berkshire Hathaway Inc. New,Berkshire Hathaway Inc. 436 | LKQ,LKQ Corporation,LKQ Corporation 437 | FAST,Fastenal Company,Fastenal Company 438 | PTC,PTC Inc.,PTC Inc. 439 | SHW,Sherwin-Williams Company (The),The Sherwin-Williams Company 440 | INTC,Intel Corporation,Intel Corporation 441 | POOL,Pool Corporation,Pool Corporation 442 | ES,Eversource Energy (D/B/A),Eversource Energy 443 | ABT,Abbott Laboratories,Abbott Laboratories 444 | CPRT,"Copart, Inc.","Copart, Inc." 445 | GILD,"Gilead Sciences, Inc.","Gilead Sciences, Inc." 446 | TXN,Texas Instruments Incorporated,Texas Instruments Incorporated 447 | WMB,"Williams Companies, Inc. (The)","The Williams Companies, Inc." 448 | LLY,Eli Lilly and Company,Eli Lilly and Company 449 | IEX,IDEX Corporation,IDEX Corporation 450 | MHK,"Mohawk Industries, Inc.","Mohawk Industries, Inc." 451 | AXP,American Express Company,American Express Company 452 | NVDA,NVIDIA Corporation,NVIDIA Corporation 453 | TDY,Teledyne Technologies Incorpora,Teledyne Technologies Incorporated 454 | MKTX,"MarketAxess Holdings, Inc.",MarketAxess Holdings Inc. 455 | IR,Ingersoll Rand Inc.,Ingersoll Rand Inc. 456 | AMCR,Amcor plc,Amcor plc 457 | A,"Agilent Technologies, Inc.","Agilent Technologies, Inc." 458 | IQV,"IQVIA Holdings, Inc.",IQVIA Holdings Inc. 459 | IVZ,Invesco Ltd,Invesco Ltd. 460 | CTXS,"Citrix Systems, Inc.","Citrix Systems, Inc." 461 | TMO,Thermo Fisher Scientific Inc,Thermo Fisher Scientific Inc. 462 | KO,Coca-Cola Company (The),The Coca-Cola Company 463 | AIZ,"Assurant, Inc.","Assurant, Inc." 464 | HPE,Hewlett Packard Enterprise Comp,Hewlett Packard Enterprise Company 465 | PM,Philip Morris International Inc,Philip Morris International Inc. 466 | UDR,"UDR, Inc.","UDR, Inc." 467 | MA,Mastercard Incorporated,Mastercard Incorporated 468 | TPR,"Tapestry, Inc.","Tapestry, Inc." 469 | KEY,KeyCorp,KeyCorp 470 | NWSA,News Corporation,News Corporation 471 | NTAP,"NetApp, Inc.","NetApp, Inc." 472 | LIN,Linde plc,Linde plc 473 | TJX,"TJX Companies, Inc. (The)","The TJX Companies, Inc." 474 | REG,Regency Centers Corporation,Regency Centers Corporation 475 | CTSH,Cognizant Technology Solutions ,Cognizant Technology Solutions Corporation 476 | ELV,"Elevance Health, Inc.",Elevance Health Inc. 477 | MSI,"Motorola Solutions, Inc.","Motorola Solutions, Inc." 478 | UAL,"United Airlines Holdings, Inc.","United Airlines Holdings, Inc." 479 | CAG,"ConAgra Brands, Inc.","Conagra Brands, Inc." 480 | MS,Morgan Stanley,Morgan Stanley 481 | SLB,Schlumberger N.V.,Schlumberger Limited 482 | UHS,"Universal Health Services, Inc.","Universal Health Services, Inc." 483 | ROP,"Roper Technologies, Inc.","Roper Technologies, Inc." 484 | WAB,Westinghouse Air Brake Technolo,Westinghouse Air Brake Technologies Corporation 485 | FIS,Fidelity National Information S,"Fidelity National Information Services, Inc." 486 | EIX,Edison International,Edison International 487 | HPQ,HP Inc.,HP Inc. 488 | HSIC,"Henry Schein, Inc.","Henry Schein, Inc." 489 | ABC,AmerisourceBergen Corporation,AmerisourceBergen Corporation 490 | MKC,"McCormick & Company, Incorporat","McCormick & Company, Incorporated" 491 | WEC,"WEC Energy Group, Inc.","WEC Energy Group, Inc." 492 | ZBRA,Zebra Technologies Corporation,Zebra Technologies Corporation 493 | EBAY,eBay Inc.,eBay Inc. 494 | ANET,"Arista Networks, Inc.","Arista Networks, Inc." 495 | ADSK,"Autodesk, Inc.","Autodesk, Inc." 496 | LVS,Las Vegas Sands Corp.,Las Vegas Sands Corp. 497 | ZION,Zions Bancorporation N.A.,"Zions Bancorporation, National Association" 498 | SPGI,S&P Global Inc.,S&P Global Inc. 499 | BAC,Bank of America Corporation,Bank of America Corporation 500 | WMT,Walmart Inc.,Walmart Inc. 501 | VNO,Vornado Realty Trust,Vornado Realty Trust 502 | CNP,"CenterPoint Energy, Inc (Holdin","CenterPoint Energy, Inc." 503 | AMAT,"Applied Materials, Inc.","Applied Materials, Inc." 504 | CHTR,"Charter Communications, Inc.","Charter Communications, Inc." 505 | --------------------------------------------------------------------------------