├── demo
    └── __init__.py
├── src
    ├── news
    │   ├── src
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   └── test_mongo_db_client.py
    │   │   └── finance_news_scraper
    │   │   │   ├── __init__.py
    │   │   │   ├── news_sources.py
    │   │   │   ├── sentiment.py
    │   │   │   ├── __main__.py
    │   │   │   └── mongo_client.py
    │   ├── requirements.txt
    │   ├── setup.py
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── pyproject.toml
    │   └── LICENSE
    └── stocks
    │   ├── src
    │       └── finance_stock_scraper
    │       │   ├── __init__.py
    │       │   ├── model
    │       │       ├── __init__.py
    │       │       ├── Intervals.py
    │       │       └── Ticker.py
    │       │   ├── ExecutionContext.py
    │       │   ├── YFDataProvider.py
    │       │   ├── TickerRepository.py
    │       │   ├── __main__.py
    │       │   ├── QuestClient.py
    │       │   └── workflow.py
    │   ├── requirements.txt
    │   ├── setup.py
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── pyproject.toml
    │   └── LICENSE
├── .env
├── news
    └── rss-feeds.json
├── tickers
    ├── downlaod_nasdaq.py
    ├── download_eurex.py
    ├── download_Financial_Markets_UK.py
    ├── download_helper.py
    ├── EUREX.csv
    └── nasdaq.csv
├── .github
    └── workflows
    │   ├── publish_docker_test.yml
    │   ├── publish_on_docker_hub.yml
    │   ├── python-publish-test.yml
    │   └── publish_on_pipy.yml
├── LICENSE
├── README.md
├── .gitignore
└── docker-compose.yml


/demo/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/news/src/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/news/src/finance_news_scraper/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/stocks/src/finance_stock_scraper/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/stocks/src/finance_stock_scraper/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/stocks/requirements.txt:
--------------------------------------------------------------------------------
1 | pytz
2 | pandas
3 | yfinance
4 | questdb
5 | requests
6 | pandas-market-calendars
7 | tqdm


--------------------------------------------------------------------------------
/src/stocks/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | import os
3 | 
4 | setup(version=os.environ.get("PACKAGE_VERSION","DEBUG"))


--------------------------------------------------------------------------------
/src/news/requirements.txt:
--------------------------------------------------------------------------------
 1 | pytz
 2 | pandas
 3 | numpy
 4 | requests
 5 | tqdm
 6 | pymongo
 7 | transformers[torch]
 8 | beautifulsoup4
 9 | newspaper3k
10 | dateparser
11 | cchardet
12 | google-news-feed


--------------------------------------------------------------------------------
/src/stocks/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10
 2 | 
 3 | ADD requirements.txt /requirements.txt
 4 | RUN pip install -r /requirements.txt
 5 | 
 6 | RUN mkdir /var/lib/stock-scraper
 7 | RUN mkdir /app
 8 | ADD ./src /app
 9 | WORKDIR /app
10 | CMD ["python","-m","finance_stock_scraper"]


--------------------------------------------------------------------------------
/src/news/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup,find_packages
2 | import os
3 | 
4 | setup(
5 |     version=os.environ.get("PACKAGE_VERSION","0.0.0"),
6 |     package_dir={"":"src"},
7 |     packages=find_packages(where="./src", exclude=("*.tests", "*.tests.*", "tests.*", "tests"))
8 |     )


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | INFLUX_DB_USER="admin"
2 | INFLUX_DB_PASSWORD="adminadmin" # PLEASE CHANGE !
3 | INFLUX_DB_ADMIN_TOKEN="OYRyhNIDCQFe1WMJeJnljPV323EWA3GE45CA1Mpdx5TBbw-pxYqfGlFgAvdtrbKgZcJZnQn7oOhLoRbsUOhnuw==" # PLEASE CHANGE !
4 | 
5 | MONGO_DB_USER="admin"
6 | MONGO_DB_PASDWORD="asda2sdqw12e4asfd" # PLEASE CHANGE!


--------------------------------------------------------------------------------
/news/rss-feeds.json:
--------------------------------------------------------------------------------
1 | {
2 |     "SeekingAlpha":"https://seekingalpha.com/market_currents.xml",
3 |     "CNBC":"http://www.cnbc.com/id/19746125/device/rss/rss.xml",
4 |     "Fortune":"https://fortune.com/feed",
5 |     "FinancialTimes":"https://www.ft.com/?format=rss",
6 |     "Investing.com":"https://www.investing.com/rss/news.rss",
7 |     "YahooNews":"https://finance.yahoo.com/news/rssindex"
8 | }


--------------------------------------------------------------------------------
/src/news/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10
 2 | 
 3 | ADD requirements.txt /requirements.txt
 4 | RUN pip install -r /requirements.txt
 5 | 
 6 | RUN mkdir /var/lib/news-scraper
 7 | RUN mkdir /var/lib/news-scraper/tickers
 8 | RUN mkdir /var/lib/news-scraper/model
 9 | RUN mkdir /var/lib/news-scraper/rss
10 | 
11 | RUN mkdir /app
12 | ADD ./src /app
13 | WORKDIR /app
14 | CMD ["python","-m","finance_news_scraper"]
15 | 
16 | 


--------------------------------------------------------------------------------
/tickers/downlaod_nasdaq.py:
--------------------------------------------------------------------------------
 1 | from yahoo_fin import stock_info as si
 2 | import pandas  as pd
 3 | from download_helper import get_company_names
 4 | 
 5 | tickers = []
 6 | 
 7 | tickers += si.tickers_sp500()
 8 | tickers += si.tickers_dow()
 9 | tickers = list(set(tickers))
10 | 
11 | short_company_names,long_company_names = get_company_names(tickers)
12 | df = pd.DataFrame({'tickers':tickers,'shortNames':short_company_names,'longNames':long_company_names})
13 | df.to_csv("./tickers/NASDAQ.csv",index=False,header=True)


--------------------------------------------------------------------------------
/tickers/download_eurex.py:
--------------------------------------------------------------------------------
 1 | from yahoo_fin import stock_info as si
 2 | import pandas  as pd
 3 | from download_helper import get_company_names
 4 | 
 5 | tickers = []
 6 | tickers += list(pd.read_html("https://en.wikipedia.org/wiki/DAX")[3]["Ticker symbol"])
 7 | tickers = list(set(tickers))
 8 | 
 9 | short_company_names,long_company_names = get_company_names(tickers)
10 | df = pd.DataFrame({'tickers':tickers,'shortNames':short_company_names,'longNames':long_company_names})
11 | df.to_csv("./tickers/EUREX.csv",index=False,header=True)


--------------------------------------------------------------------------------
/tickers/download_Financial_Markets_UK.py:
--------------------------------------------------------------------------------
 1 | from yahoo_fin import stock_info as si
 2 | import pandas  as pd
 3 | from download_helper import get_company_names
 4 | 
 5 | tickers = []
 6 | 
 7 | tickers += si.tickers_ftse100()
 8 | tickers += si.tickers_ftse250()
 9 | tickers = list(set(tickers))
10 | 
11 | short_company_names,long_company_names = get_company_names(tickers)
12 | df = pd.DataFrame({'tickers':tickers,'shortNames':short_company_names,'longNames':long_company_names})
13 | df.to_csv("./tickers/Financial_Markets_UK.csv",index=False,header=True)


--------------------------------------------------------------------------------
/src/stocks/src/finance_stock_scraper/ExecutionContext.py:
--------------------------------------------------------------------------------
1 | from finance_stock_scraper.TickerRepository import TickerRepository
2 | from finance_stock_scraper.YFDataProvider import YFDataProvider
3 | from finance_stock_scraper.QuestClient import QuestClient
4 | 
5 | class ExecutionContext(object):
6 |     def __init__(self,tickerRepository:TickerRepository,yfDataProcider:YFDataProvider,questClient:QuestClient) -> None:
7 |         self.tickerRepository = tickerRepository
8 |         self.yfDataProcider = yfDataProcider
9 |         self.questClient = questClient


--------------------------------------------------------------------------------
/src/stocks/README.md:
--------------------------------------------------------------------------------
1 | # finance-stock-scraper
2 | Collects stock prices periodically after a market closes from the [Yahoo-Finance-API](https://finance.yahoo.com/) and stores them in a [QuestDB](https://questdb.io/).
3 | 
4 | ## Usage
5 | Install the module with `pip install finance-stock-scraper` and run the script with `python -m finance_stock_scraper` or use the [docker-image](https://hub.docker.com/repository/docker/llukas22/finance-stock-scraper)
6 | ___
7 | For an example usage and configuration see [here](https://github.com/LLukas22/Finance-Data-Scraper).


--------------------------------------------------------------------------------
/src/news/README.md:
--------------------------------------------------------------------------------
1 | # finance-news-scraper
2 | Periodically collect articles from [Google-News](https://news.google.com/topstories), [FinViz](https://finviz.com/) or RSS-Feeds and store them in a [MongoDB](https://www.mongodb.com/).
3 | 
4 | ## Usage
5 | Install the module with `pip install finance-news-scraper` and run the script with `python -m finance_news_scraper` or use the [docker-image](https://hub.docker.com/repository/docker/llukas22/finance-news-scraper)
6 | ___
7 | For an example usage and configuration see [here](https://github.com/LLukas22/Finance-Data-Scraper).


--------------------------------------------------------------------------------
/src/stocks/src/finance_stock_scraper/model/Intervals.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | class IntervalTypes(Enum):
 4 |         Intraday = 0
 5 |         Daily = 1
 6 |         
 7 | INTERVALS = {
 8 |     "1m" : IntervalTypes.Intraday,
 9 |     "2m" : IntervalTypes.Intraday,
10 |     "5m" : IntervalTypes.Intraday,
11 |     "15m": IntervalTypes.Intraday,
12 |     "30m": IntervalTypes.Intraday,
13 |     "60m": IntervalTypes.Intraday,
14 |     "90m": IntervalTypes.Intraday,
15 |     "1h" : IntervalTypes.Intraday,
16 |     "1d" : IntervalTypes.Daily,
17 |     "5d" : IntervalTypes.Daily,
18 |     "1wk": IntervalTypes.Daily,
19 |     "1mo": IntervalTypes.Daily,
20 |     "3mo": IntervalTypes.Daily,
21 |     }


--------------------------------------------------------------------------------
/.github/workflows/publish_docker_test.yml:
--------------------------------------------------------------------------------
 1 | name: Test Publish to Docker-Hub
 2 | 
 3 | on: workflow_dispatch
 4 | 
 5 | jobs:
 6 |   docker:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - name: checkout
10 |         uses: actions/checkout@v3
11 |       
12 |       - name: Login to DockerHub
13 |         uses: docker/login-action@v2
14 |         with:
15 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
16 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
17 |           
18 |       - name: Build and push news scraper
19 |         uses: docker/build-push-action@v3
20 |         with:
21 |           context: ./src/news
22 |           file: ./src/news/Dockerfile
23 |           builder: ${{ steps.buildx.outputs.name }}
24 |           push: true
25 |           tags: ${{ secrets.DOCKERHUB_USERNAME }}/finance-data-scraper:latest
26 | 


--------------------------------------------------------------------------------
/tickers/download_helper.py:
--------------------------------------------------------------------------------
 1 | from yahooquery import Ticker
 2 | from tqdm import tqdm
 3 | 
 4 | def get_company_names(tickers:list[str])->tuple[list[str],list[str]]:
 5 |     short_company_names = []
 6 |     long_company_names = []
 7 |     infos = Ticker(tickers).quote_type
 8 |     for ticker in tqdm(tickers):
 9 |         info = infos[ticker]
10 |         if info:
11 |             if "shortName" in info:
12 |                 short_company_names.append(info["shortName"])
13 |             else:
14 |                 short_company_names.append("")
15 |             
16 |             if "longName" in info:
17 |                 long_company_names.append(info["longName"])
18 |             else:
19 |                 long_company_names.append("")
20 |         else:
21 |             short_company_names.append("")
22 |             long_company_names.append("")
23 |     return short_company_names,long_company_names


--------------------------------------------------------------------------------
/src/stocks/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "setuptools-scm"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "finance_stock_scraper"
 7 | dynamic = ["version"]
 8 | authors = [
 9 |   { name="Lukas Kreussel"},
10 | ]
11 | description = "Collect, store and access stock exchange data locally"
12 | readme = "README.md"
13 | license = { file="LICENSE" }
14 | requires-python = ">=3.10"
15 | classifiers = [
16 |     "Programming Language :: Python :: 3",
17 |     "License :: OSI Approved :: MIT License",
18 |     "Operating System :: OS Independent",
19 | ]
20 | 
21 | dependencies=[
22 |     "pytz>=2022.1",
23 |     "pandas>=1.4.2",
24 |     "yfinance>=0.1.70",
25 |     "questdb>=1.0.0",
26 |     "requests>=2.28.1",
27 |     "pandas-market-calendars>=3.4",
28 |     "tqdm>=4.64.0"
29 | ]
30 |     
31 | [project.urls]
32 | "Homepage" = "https://github.com/LLukas22/Finance-Data-Scraper"
33 | "Bug Tracker" = "https://github.com/LLukas22/Finance-Data-Scraper/issues"


--------------------------------------------------------------------------------
/src/stocks/src/finance_stock_scraper/YFDataProvider.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import pandas as pd
 3 | import yfinance as yf
 4 | from yfinance import shared
 5 | 
 6 | class YFDataProvider(object):
 7 |     def __init__(self) -> None:
 8 |         pass
 9 |     
10 |     def get_data(self,tickers:list[str],start_date:datetime.datetime,end_date:datetime.datetime,interval:str)->tuple[pd.DataFrame,dict]:
11 |         if len(tickers) < 1:
12 |             return None
13 |         
14 |         ticker_string = " ".join(tickers)
15 |         data = yf.download(ticker_string, start_date, end_date, interval=interval, threads=True, group_by = 'ticker', progress=True)
16 |         return data,shared._ERRORS
17 | 
18 |     def get_data_from_period(self,tickers:list[str],interval:str,period:str="max")->tuple[pd.DataFrame,dict]:
19 |         if len(tickers) < 1:
20 |             return None
21 |         
22 |         ticker_string = " ".join(tickers)
23 |         data = yf.download(ticker_string, period = period, interval=interval, threads=True, group_by = 'ticker', progress=True)
24 |         return data,shared._ERRORS


--------------------------------------------------------------------------------
/src/news/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "setuptools-scm"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "finance_news_scraper"
 7 | dynamic = ["version"]
 8 | authors = [
 9 |   { name="Lukas Kreussel"},
10 | ]
11 | description = "Collect, store and access finance news locally"
12 | readme = "README.md"
13 | license = { file="LICENSE" }
14 | requires-python = ">=3.10"
15 | classifiers = [
16 |     "Programming Language :: Python :: 3",
17 |     "License :: OSI Approved :: MIT License",
18 |     "Operating System :: OS Independent",
19 | ]
20 | 
21 | dependencies=[
22 |   "pytz>=2022.1",
23 |   "pandas>=1.4.2",
24 |   "numpy>=1.22.3",
25 |   "requests>=2.28.1",
26 |   "tqdm>=4.64.0",
27 |   "pymongo>=4.2.0",
28 |   "transformers[torch]>=4.19.4",
29 |   "beautifulsoup4>=4.11.1",
30 |   "newspaper3k>=0.2.8",
31 |   "dateparser>=1.1.1",
32 |   "cchardet>=2.1.7",
33 |   "google-news-feed>=1.0.0"
34 | ]
35 |     
36 | [project.urls]
37 | "Homepage" = "https://github.com/LLukas22/Finance-Data-Scraper"
38 | "Bug Tracker" = "https://github.com/LLukas22/Finance-Data-Scraper/issues"


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 LLukas22
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/news/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 LLukas22
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/stocks/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 LLukas22
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/stocks/src/finance_stock_scraper/model/Ticker.py:
--------------------------------------------------------------------------------
 1 | import pandas_market_calendars as mcal
 2 | from pandas_market_calendars import MarketCalendar
 3 | import pandas as pd
 4 | import datetime
 5 |      
 6 | class Ticker(object):
 7 |     trading_times:MarketCalendar
 8 |     
 9 |     def __init__(self,ticker:str,exchange:str):
10 |         self.ticker = ticker.upper()
11 |         self.exchange = exchange.upper()
12 |         self.trading_times = None 
13 |         
14 |     def _init_calendar(self)->None:
15 |         self.trading_times = mcal.get_calendar(self.exchange)
16 |         
17 |     def get_trading_times(self,start_date:datetime.date,end_date:datetime.date)->pd.DataFrame:
18 |         if self.trading_times is None:
19 |             self._init_calendar()
20 |         return self.trading_times.schedule(start_date=start_date, end_date=end_date)
21 |     
22 |     def is_in_trading_times(self,date:datetime.date)->bool:
23 |         schedule = self.get_trading_times(date,date)
24 |         if len(schedule) == 0:
25 |             return False
26 |         return date == schedule.index[0].date()
27 |     
28 |     
29 | 
30 |     
31 | 
32 |                 
33 |             
34 |         
35 | 
36 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_on_docker_hub.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to Docker-Hub
 2 | 
 3 | on:   
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   docker:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: checkout
12 |         uses: actions/checkout@v3
13 |         
14 |       - id: get_version
15 |         uses: battila7/get-version-action@v2
16 |         
17 |       - name: print version
18 |         run: echo ${{ steps.get_version.outputs.version-without-v }}
19 |         
20 |       - name: Login to DockerHub
21 |         uses: docker/login-action@v2
22 |         with:
23 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
24 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
25 |           
26 |       - name: Build and push news scraper
27 |         uses: docker/build-push-action@v3
28 |         with:
29 |           context: ./src/news
30 |           file: ./src/news/Dockerfile
31 |           builder: ${{ steps.buildx.outputs.name }}
32 |           push: true
33 |           tags: ${{ secrets.DOCKERHUB_USERNAME }}/finance-news-scraper:${{ steps.get_version.outputs.version-without-v }}
34 |           
35 |       - name: Build and push stock scraper
36 |         uses: docker/build-push-action@v3
37 |         with:
38 |           context: ./src/stocks
39 |           file: ./src/stocks/Dockerfile
40 |           builder: ${{ steps.buildx.outputs.name }}
41 |           push: true
42 |           tags: ${{ secrets.DOCKERHUB_USERNAME }}/finance-stock-scraper:${{ steps.get_version.outputs.version-without-v }}
43 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish-test.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package to Test PyPi
10 | 
11 | on: workflow_dispatch
12 | 
13 | env:
14 |   GLOBAL_PACKAGE_VERSION: 0.0.2
15 |   
16 | permissions:
17 |   contents: read
18 | 
19 | jobs:
20 |   deploy:
21 | 
22 |     runs-on: ubuntu-latest
23 | 
24 |     steps:
25 |     - name: update env
26 |       run: echo "PACKAGE_VERSION=$GLOBAL_PACKAGE_VERSION" >> $GITHUB_ENV
27 |       
28 |     - name: checkout
29 |       uses: actions/checkout@v3
30 |       
31 |     - name: Set up Python
32 |       uses: actions/setup-python@v3
33 |       with:
34 |         python-version: '3.10'
35 |     - name: Install dependencies
36 |       run: |
37 |         python -m pip install --upgrade pip
38 |         pip install build
39 |         
40 |     - name: Build news_scraper
41 |       working-directory: ./src/news
42 |       run: python -m build
43 |     - name: Publish news_scraper
44 |       uses: pypa/gh-action-pypi-publish@v1.5.1
45 |       with:
46 |         user: __token__
47 |         password: ${{ secrets.PYPI_TEST_TOKEN }}
48 |         repository_url: https://test.pypi.org/legacy/
49 |         packages_dir: ./src/news/dist/
50 |  
51 |     - name: Build stock_scraper
52 |       working-directory: ./src/stocks
53 |       run: python -m build
54 |     - name: Publish stock_scraper
55 |       uses: pypa/gh-action-pypi-publish@v1.5.1
56 |       with:
57 |         user: __token__
58 |         password: ${{ secrets.PYPI_TEST_TOKEN }}
59 |         repository_url: https://test.pypi.org/legacy/
60 |         packages_dir: ./src/stocks/dist/
61 | 


--------------------------------------------------------------------------------
/tickers/EUREX.csv:
--------------------------------------------------------------------------------
 1 | tickers,shortNames,longNames
 2 | BAS.DE,BASF SE,BASF SE
 3 | BNR.DE,BRENNTAG SE NA O.N.,Brenntag SE
 4 | DTG.DE,DAIMLER TRUCK HOLD,Daimler Truck Holding AG
 5 | 1COV.DE,COVESTRO AG,Covestro AG
 6 | PAH3.DE,PORSCHE AUTOM.HLDG VZO,Porsche Automobil Holding SE
 7 | HNR1.DE,HANNOVER RUECK SE NA O.N.,Hannover Rück SE
 8 | DBK.DE,DEUTSCHE BANK AG,Deutsche Bank Aktiengesellschaft
 9 | QIA.DE,"QIAGEN NV  EO -,01",Qiagen N.V.
10 | ZAL.DE,ZALANDO SE,Zalando SE
11 | DPW.DE,DEUTSCHE POST AG,Deutsche Post AG
12 | VOW3.DE,VOLKSWAGEN AG,Volkswagen AG
13 | MBG.DE,MERCEDES-BENZ GROUP,Mercedes-Benz Group AG
14 | SIE.DE,SIEMENS AG,Siemens Aktiengesellschaft
15 | ADS.DE,ADIDAS AG,adidas AG
16 | EOAN.DE,E.ON SE,E.ON SE
17 | MRK.DE,MERCK KGAA,MERCK Kommanditgesellschaft auf Aktien
18 | SHL.DE,SIEMENS HEALTH.AG NA O.N.,Siemens Healthineers AG
19 | ALV.DE,ALLIANZ SE,Allianz SE
20 | DTE.DE,DEUTSCHE TELEKOM AG,Deutsche Telekom AG
21 | CON.DE,CONTINENTAL AG,Continental Aktiengesellschaft
22 | HFG.DE,HELLOFRESH SE  INH O.N.,HelloFresh SE
23 | FRE.DE,FRESENIUS SE&CO KGAA,Fresenius SE & Co. KGaA
24 | IFX.DE,INFINEON TECHNOLOGIES AG,Infineon Technologies AG
25 | LIN.DE,"LINDE PLC  EO 0,001",Linde plc
26 | FME.DE,FRESENIUS MEDICAL CARE AG & CO ,Fresenius Medical Care AG & Co. KGaA
27 | MUV2.DE,MUENCHENER RUECKVERSICHERUNGS A,Münchener Rückversicherungs-Gesellschaft Aktiengesellschaft in München
28 | SY1.DE,SYMRISE AG INH. O.N.,Symrise AG
29 | RWE.DE,RWE AG  INH O.N.,RWE Aktiengesellschaft
30 | SAP.DE,SAP SE,SAP SE
31 | AIR.DE,AIRBUS SE,Airbus SE
32 | HEI.DE,HEIDELBERGCEMENT AG O.N.,HeidelbergCement AG
33 | MTX.DE,MTU AERO ENGINES NA O.N.,MTU Aero Engines AG
34 | PUM.DE,PUMA SE,PUMA SE
35 | BEI.DE,BEIERSDORF AG O.N.,Beiersdorf Aktiengesellschaft
36 | SRT3.DE,SARTORIUS AG VZO O.N.,Sartorius Aktiengesellschaft
37 | DB1.DE,DEUTSCHE BOERSE NA O.N.,Deutsche Börse AG
38 | BAYN.DE,BAYER AG,Bayer Aktiengesellschaft
39 | VNA.DE,VONOVIA SE NA O.N.,Vonovia SE
40 | HEN3.DE,HENKEL AG&CO. KGAA,Henkel AG & Co. KGaA
41 | BMW.DE,BAYERISCHE MOTOREN WERKE AG,Bayerische Motoren Werke Aktiengesellschaft
42 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_on_pipy.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package to PyPi
10 | 
11 | on:   
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - name: checkout
25 |       uses: actions/checkout@v3
26 |       
27 |     - id: get_version
28 |       uses: battila7/get-version-action@v2
29 |         
30 |     - name: print version
31 |       run: echo ${{ steps.get_version.outputs.version-without-v }}
32 |       
33 |     - name: update env
34 |       run: echo "PACKAGE_VERSION=${{ steps.get_version.outputs.version-without-v }}" >> $GITHUB_ENV
35 |         
36 |     - name: print package version
37 |       run: echo $PACKAGE_VERSION
38 |       
39 |     - name: Set up Python
40 |       uses: actions/setup-python@v3
41 |       with:
42 |         python-version: '3.10'
43 |     - name: Install dependencies
44 |       run: |
45 |         python -m pip install --upgrade pip
46 |         pip install build
47 |         
48 |     - name: Build news_scraper
49 |       working-directory: ./src/news
50 |       run: python -m build
51 |     - name: Publish news_scraper
52 |       uses: pypa/gh-action-pypi-publish@v1.5.1
53 |       with:
54 |         user: __token__
55 |         password: ${{ secrets.PYPI_TOKEN }}
56 |         packages_dir: ./src/news/dist/
57 |  
58 |     - name: Build stock_scraper
59 |       working-directory: ./src/stocks
60 |       run: python -m build
61 |     - name: Publish stock_scraper
62 |       uses: pypa/gh-action-pypi-publish@v1.5.1
63 |       with:
64 |         user: __token__
65 |         password: ${{ secrets.PYPI_TOKEN }}
66 |         packages_dir: ./src/stocks/dist/
67 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Finance-Data-Scraper
 2 | Packages to collect Stock and News data periodically from different sources and save it to databases.
 3 | 
 4 | 
 5 | Also includes python packages to easily access and consume the data.
 6 | 
 7 | ## Finance-Stock-Scraper
 8 | Collects data from the  [Yahoo-Finance API](https://finance.yahoo.com/) and saves it to a [QuestDB](https://questdb.io/) instance.
 9 | 
10 | ### Server
11 | Use the [docker-image](https://hub.docker.com/repository/docker/llukas22/finance-stock-scraper) to run the server. The server will collect data after each trading day. The tickers are provided via *.csv files that contain the ticker, name and long-name of the company. The files must be named after the exchange the tickers are listed on. An example can be found [here](tickers/NASDAQ.csv).
12 | 
13 | An example config can be found in the [docker-compose.yml](docker-compose.yml) file.
14 | 
15 | 
16 | ### Consume the Data
17 | To consume the data, use the [pip package](https://pypi.org/project/finance-stock-scraper/).
18 | 
19 | ```
20 | pip install finance-stock-scraper
21 | ```
22 | 
23 | Then use the TickerRepository to get the data as a pandas DataFrame.
24 | 
25 | ```python
26 | from finance_stock_scraper.QuestClient import QuestClient
27 | from finance_stock_scraper.TickerRepository import TickerRepository
28 | from finance_stock_scraper.model.Ticker import Ticker
29 | 
30 | questClient = QuestClient(host=IP)
31 | tickerRepository = TickerRepository(questClient)
32 | 
33 | #Build a ticker object
34 | ticker = Ticker("GOOGL","NASDAQ")
35 | #Get data from QuestDB
36 | df_daily = tickerRepository.get_values(tickers=ticker,interval="1d",values=["open","close","high","low","volume"])
37 | df_minutly = tickerRepository.get_values(tickers=ticker,interval="5m",values=["open","close","high","low","volume"],start_time=START_TIME,end_time=END_TIME)
38 | ```
39 | 
40 | ## Finance-News-Scraper
41 | Scrape news and save them to a [MongoDB](https://www.mongodb.com/) instance.
42 | ### Server
43 | Use the [docker-image](https://hub.docker.com/repository/docker/llukas22/finance-news-scraper) to run the server. The server will collect articles from [Google-News](https://news.google.com/topstories), [FinViz](https://finviz.com/) or RSS-Feeds. Then a sentiment analysis will be performed and the articles will be saved to the database.
44 | 
45 | 
46 | An example config can be found in the [docker-compose.yml](docker-compose.yml) file.
47 | ### Consume the Data
48 | To consume the data, use the [pip package](https://pypi.org/project/finance-news-scraper/).
49 | 
50 | ```
51 | pip install finance-news-scraper
52 | ```
53 | Then use the MongoDBClient to get the data as a pandas DataFrame.
54 | 
55 | ```python
56 | from finance_news_scraper.mongo_client import MongoDBClient
57 | 
58 | mongoClient = MongoDBClient(host=IP)
59 | 
60 | articles = mongoClient.get_articles(["GOOGL"])
61 | sentiments = mongoClient.get_sentiments("GOOGL",frequency="h",start=START_TIME,end=END_TIME)
62 | ```
63 | 
64 | ## Examples
65 | For more examples, see the [demo](demo/demo.ipynb) notebook.


--------------------------------------------------------------------------------
/src/news/src/finance_news_scraper/news_sources.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import hashlib
 3 | from bs4 import BeautifulSoup
 4 | import requests
 5 | from dateutil.parser import parse
 6 | import pytz
 7 | from google_news_feed import GoogleNewsFeed
 8 | from datetime import datetime, timedelta
 9 | from tqdm import tqdm
10 | import time
11 | import random 
12 | import urllib.parse
13 | 
14 | USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0'
15 | 
16 | class News_Item(object):
17 |     def __init__(self,publisher:str,link:str,tickers:list[str],pub_date:datetime) -> None:
18 |         self.publisher = publisher.upper()
19 |         self.link = link
20 |         self.pub_date = pub_date
21 |         self.tickers = tickers
22 |         self.__hash = None
23 |                    
24 |     @property
25 |     def hash(self)->str:
26 |         if self.__hash:
27 |             return self.__hash
28 |         else:
29 |             self.__hash = hashlib.sha512((self.link).encode("UTF-8")).hexdigest()
30 |             return self.__hash
31 | 
32 | def get_rss_items(rss_url:str,publisher:str) -> list[News_Item]:
33 |     news_items = []
34 |     r = requests.get(rss_url)
35 |     webpage = r.content
36 |     soup = BeautifulSoup(webpage, features='xml')
37 |     items = soup.find_all('item')
38 |     for item in items:
39 |         link =  item.find('link').text
40 |         dt = parse(item.find('pubDate').text).astimezone(pytz.UTC)
41 |         #Some sites give us the tickers of the article
42 |         tickers = None
43 |         if item.find('category'):
44 |             categories = item.find_all('category')
45 |             tickers = [category.text for category in categories]
46 |         news_items.append(News_Item(publisher,link,tickers,dt))
47 |     return news_items
48 | 
49 | def get_google_news_items(tickers:list[tuple[str,str,str]]) -> list[News_Item]:
50 |     news_items = []
51 |     gnf = GoogleNewsFeed()
52 |     for ticker,shortname,longname in tqdm(tickers,desc="Google News"):
53 |         try:
54 |             time.sleep(random.uniform(0.5,1.0)) # try to avoid being rate limited
55 |             results = gnf.query(f"{ticker} OR {shortname} OR {longname}",when="2w")
56 |             for result in results:
57 |                 news_items.append(News_Item(result.source,result.link,[ticker],result.pubDate))
58 |         except Exception as e:
59 |             logging.error(e)
60 |     return news_items
61 | 
62 | def get_finviz_news_items(tickers:list[tuple[str,str,str]]) -> list[News_Item]:
63 |     news_items = []
64 |     finwiz_url = 'https://finviz.com/quote.ashx?t='
65 |     for ticker,shortname,longname in tqdm(tickers,desc="FinViz News"):
66 |         try:
67 |             time.sleep(random.uniform(0.5,1.0)) # try to avoid being rate limited
68 |             url = finwiz_url + ticker.lower()
69 |             result = requests.get(url,headers = {'User-Agent': USER_AGENT})
70 |             if result.status_code == 200:
71 |                 html = BeautifulSoup(result.content, features='html.parser')
72 |                 news_table = html.find(id='news-table')
73 |                 
74 |                 for x in news_table.findAll('tr'):
75 |                     link = x.a.attrs['href']
76 |                     publisher = x.span.get_text().strip()
77 | 
78 |                     date_scrape = x.td.text.split()
79 |                     datetime = parse(" ".join(date_scrape)).astimezone(pytz.UTC)
80 |                     news_items.append(News_Item(publisher,link,[ticker],datetime))
81 |         except Exception as e:
82 |             logging.error(e)
83 |     return news_items
84 |             
85 | 
86 |             
87 |     


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /storage
  2 | /.vscode
  3 | /worker
  4 | /sentiment_model
  5 | 
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | cover/
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | db.sqlite3
 67 | db.sqlite3-journal
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | .pybuilder/
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | #   For a library or package, you might want to ignore these files since the code is
 92 | #   intended to run in multiple environments; otherwise, check them in:
 93 | # .python-version
 94 | 
 95 | # pipenv
 96 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 97 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 98 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 99 | #   install all needed dependencies.
100 | #Pipfile.lock
101 | 
102 | # poetry
103 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
105 | #   commonly ignored for libraries.
106 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107 | #poetry.lock
108 | 
109 | # pdm
110 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
111 | #pdm.lock
112 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
113 | #   in version control.
114 | #   https://pdm.fming.dev/#use-with-ide
115 | .pdm.toml
116 | 
117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118 | __pypackages__/
119 | 
120 | # Celery stuff
121 | celerybeat-schedule
122 | celerybeat.pid
123 | 
124 | # SageMath parsed files
125 | *.sage.py
126 | 
127 | # Environments
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | 
142 | # mkdocs documentation
143 | /site
144 | 
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 | 
150 | # Pyre type checker
151 | .pyre/
152 | 
153 | # pytype static type analyzer
154 | .pytype/
155 | 
156 | # Cython debug symbols
157 | cython_debug/
158 | 
159 | # PyCharm
160 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
163 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | #.idea/
165 | 


--------------------------------------------------------------------------------
/src/stocks/src/finance_stock_scraper/TickerRepository.py:
--------------------------------------------------------------------------------
 1 | from finance_stock_scraper.QuestClient import QuestClient
 2 | from finance_stock_scraper.model.Ticker import Ticker
 3 | import os
 4 | import pandas as pd
 5 | from datetime import datetime
 6 | import logging
 7 | 
 8 | class TickerRepository(object):
 9 |     exchanges:dict[str,dict[str,Ticker]]
10 |     
11 |     def __init__(self,quest_client:QuestClient) -> None:
12 |         self.exchanges = {}
13 |         self.quest_client = quest_client
14 |         
15 |     def load_tickers(self,directory:str)->None:
16 |         """
17 |         Load tickers from *.csv files in a directory where the filename is the exchange and the tickers are the rows in the file.
18 |         """
19 |         files = [file for file in os.listdir(directory) if file.endswith(".csv")]
20 |         if len(files) == 0:
21 |             logging.error("Found no *.csv files in the Tickers Directory!")
22 |              
23 |         logging.debug(f"Found files: {','.join(files)}")
24 |                           
25 |         for file in files:
26 |             file = os.path.join(directory,file)
27 |             exchange = os.path.basename(file).split('.')[0].upper()
28 |             tickers = pd.read_csv(file)
29 |             for ticker in tickers.values:
30 |                 if ticker[0] is not None and isinstance(ticker[0],str):
31 |                     self.add_ticker(Ticker(ticker[0],exchange))
32 |                 
33 |     def add_ticker(self,ticker:Ticker)->None:
34 |         if ticker.exchange not in self.exchanges:
35 |              self.exchanges[ticker.exchange] = {}
36 |         self.exchanges[ticker.exchange][ticker.ticker] = ticker
37 |         
38 |         
39 |     def get_ticker(self,ticker:str)->Ticker|None:
40 |         found_ticker = None
41 |         ticker = ticker.upper()
42 |         for exchage in self.exchanges:
43 |             if ticker in self.exchanges[exchage]:
44 |                 found_ticker = self.exchanges[exchage][ticker]
45 |                 break 
46 |         return found_ticker
47 |     
48 |     
49 |     def _get_single_value(self,ticker:Ticker,interval:str,values:list[str]=["close"],start_time:datetime|None=None,end_time:datetime|None=None)->pd.DataFrame|None:
50 |         df = None
51 |         result = self.quest_client.get_data(ticker,interval,values,start_time,end_time)
52 |         if result:
53 |             data = {}
54 |             for i,column in enumerate(values):
55 |                 data[column] = [entry[i+1] for entry in result['dataset']]
56 |             index = [datetime.strptime(entry[0] ,"%Y-%m-%dT%H:%M:%S.%fZ") for entry in result['dataset']]
57 |             df = pd.DataFrame(data=data,index=index)   
58 |         return df
59 |     
60 |     def _get_multiple_values(self,tickers:list[Ticker],interval:str,values:list[str]=["close"],start_time:datetime|None=None,end_time:datetime|None=None)->dict[str,pd.DataFrame]:
61 |         dataframes = {}
62 |         for ticker in tickers:
63 |             df = self._get_single_value(ticker,interval,values,start_time,end_time)
64 |             if df is not None:
65 |                 dataframes[ticker.ticker] = df
66 |         return dataframes
67 |     
68 |     def get_values(self,tickers:list[Ticker]|Ticker,interval:str,values:list[str]=["close"],start_time:datetime|None=None,end_time:datetime|None=None)->pd.DataFrame|dict[str,pd.DataFrame]|None:
69 |         if isinstance(tickers,list):
70 |             return self._get_multiple_values(tickers,interval,values,start_time,end_time)
71 |         else:
72 |             return self._get_single_value(tickers,interval,values,start_time,end_time)
73 |         
74 |     def remove(self,ticker:str)->bool:
75 |         ticker = ticker.upper()
76 |         for exchage in self.exchanges:
77 |             if ticker in self.exchanges[exchage]:
78 |                 self.exchanges[exchage].pop(ticker)
79 |                 return True
80 |         return False
81 | 
82 |                 
83 | if __name__ == "__main__":
84 |     questClient = QuestClient()
85 |     
86 |     repo = TickerRepository(questClient)
87 |     
88 |     ticker1 = Ticker("A","NASDAQ")
89 |     ticker2 = Ticker("GOOGL","NASDAQ")
90 |     repo.add_ticker(ticker1)
91 |     repo.add_ticker(ticker2)
92 |     data = repo.get_values([ticker1,ticker2],"1d",values=["close","volume"])
93 |     print(data)
94 |     
95 |         


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | version: "3.9"
  2 | 
  3 | services:
  4 |   questDB:
  5 |     container_name: questDB
  6 |     image: questdb/questdb:latest
  7 |     profiles:
  8 |       - database
  9 |       - stock-scraper
 10 |       - all
 11 |     ports:
 12 |       - "9000:9000"
 13 |       - "9003:9003"
 14 |       - "9009:9009"
 15 |     restart:
 16 |       unless-stopped
 17 |     volumes:
 18 |       - ./storage/questdb:/var/lib/questdb
 19 |          
 20 |   mongoDB:
 21 |     image: mongo
 22 |     container_name: mongodb
 23 |     profiles:
 24 |       - database
 25 |       - mongoDB
 26 |       - all
 27 |     volumes:
 28 |       - ./storage/mongodb:/data/db
 29 |     ports:
 30 |      - "27017:27017"
 31 |     restart:
 32 |       unless-stopped
 33 |     environment:
 34 |       MONGO_INITDB_ROOT_USERNAME: ${MONGO_DB_USER}
 35 |       MONGO_INITDB_ROOT_PASSWORD: ${MONGO_DB_PASDWORD}
 36 |     networks:
 37 |       - mongo-network
 38 | 
 39 |   mongo-express:
 40 |     image: mongo-express
 41 |     container_name: mongo-express
 42 |     profiles:
 43 |       - database
 44 |       - mongoDB
 45 |       - all
 46 |     ports:
 47 |       - "8081:8081"
 48 |     environment:
 49 |       ME_CONFIG_MONGODB_ADMINUSERNAME: ${MONGO_DB_USER}
 50 |       ME_CONFIG_MONGODB_ADMINPASSWORD: ${MONGO_DB_PASDWORD}
 51 |       ME_CONFIG_MONGODB_URL: mongodb://${MONGO_DB_USER}:${MONGO_DB_PASDWORD}@mongoDB:27017
 52 |     restart:
 53 |       unless-stopped
 54 |     depends_on:
 55 |       - mongoDB
 56 |     networks:
 57 |       - mongo-network
 58 | 
 59 |   stock-scraper:
 60 |     container_name: StockScraper
 61 |     #build: ./src/stocks/
 62 |     image: llukas22/finance-stock-scraper:0.0.4
 63 |     profiles:
 64 |       - all
 65 |       - stock-scraper
 66 |     tty: true
 67 |     environment:
 68 |       - STOCKSCRAPER_QUESTDB_HOST=questdb
 69 |       - STOCKSCRAPER_QUESTDB_ILP_PORT=9009 #line protocol port
 70 |       - STOCKSCRAPER_QUESTDB_MONITORING_PORT=9003 #Port for health check
 71 |       - STOCKSCRAPER_QUESTDB_PORT=9000 #rest port
 72 |       - STOCKSCRAPER_MODE=Scheduled #Single or Scheduled
 73 |       - STOCKSCRAPER_TICKERS_DIR=/var/lib/stock-scraper
 74 |       - STOCKSCRAPER_SLEEPTIME=3600 #1hour in seconds
 75 |       - STOCKSCRAPER_DEBUG=True #activate debug mode
 76 |     volumes:
 77 |       - ./tickers:/var/lib/stock-scraper
 78 |     restart:
 79 |       unless-stopped
 80 |     depends_on:
 81 |       - questDB
 82 | 
 83 |   news-scraper:
 84 |     container_name: NewsScraper
 85 |     #build: ./src/news/
 86 |     image: llukas22/finance-news-scraper:0.0.4
 87 |     profiles:
 88 |       - all
 89 |     tty: true
 90 |     environment:
 91 |       - NEWSSCRAPER_SCRAPE_NEWS=true #scrape news
 92 |       - NEWSSCRAPER_SENTIMENT_ANALYSIS=true #perfrom sentiment analysis on news (deactivate this if you dont have a container with a gpu)
 93 |       - NEWSSCRAPER_DOWNLOAD_RSS_FEED=false #Use the rss feeds from NEWSSCRAPER_RSS_DIR
 94 |       - NEWSSCRAPER_DOWNLOAD_GOOGLE_NEWS=true #Use Google news for the tickers found in STOCKSCRAPER_TICKERS_DIR
 95 |       - NEWSSCRAPER_DOWNLOAD_FINVIZ_NEWS=false #Use Finviz news for the tickers found in STOCKSCRAPER_TICKERS_DIR
 96 |       - NEWSSCRAPER_DEBUG=true #activate debug mode
 97 |       - NEWSSCRAPER_MODE=Scheduled #Single or Scheduled
 98 |       - NEWSSCRAPER_SENTIMENT_MODE=New #New or All
 99 |       - NEWSSCRAPER_SLEEPTIME=21600 #6 hours in seconds
100 |       - STOCKSCRAPER_TICKERS_DIR=/var/lib/news-scraper/tickers
101 |       - NEWSSCRAPER_RSS_DIR=/var/lib/news-scraper/rss
102 |       #MongoDB Settings
103 |       - NEWSSCRAPER_MONGODB_HOST=mongoDB
104 |       - NEWSSCRAPER_MONGODB_PORT=27017
105 |       - NEWSSCRAPER_MONGODB_USER=${MONGO_DB_USER}
106 |       - NEWSSCRAPER_MONGODB_PASSWORD=${MONGO_DB_PASDWORD}
107 |       - NEWSSCRAPER_MONGODB_DBNAME=news
108 |       - NEWSSCRAPER_MONGODB_ARTICLE_COLLECTIONNAME=articles
109 |       - NEWSSCRAPER_MONGODB_SENTIMENT_COLLECTIONNAME=sentiments
110 |       #Sentiment Analysis Settings
111 |       - NEWSSCRAPER_SENTIMENT_SEQUENZMODEL=ProsusAI/finbert #Huggingface model to use
112 |       - NEWSSCRAPER_SENTIMENT_TOKENIZER=ProsusAI/finbert #Huggingface tokenizer to use
113 |       - NEWSSCRAPER_MODEL_DIR=/var/lib/news-scraper/model #Directory where the model is stored      
114 |     volumes:
115 |       - ./tickers:/var/lib/news-scraper/tickers
116 |       - ./news:/var/lib/news-scraper/rss
117 |       - ./sentiment_model:/var/lib/news-scraper/model
118 |     depends_on:
119 |      - mongoDB
120 |     restart:
121 |       unless-stopped
122 |     networks:
123 |       - mongo-network
124 |     
125 | networks:
126 |   mongo-network:
127 |     driver: bridge


--------------------------------------------------------------------------------
/src/news/src/finance_news_scraper/sentiment.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import torch 
  3 | from torch.nn import functional as f
  4 | from transformers import BertForSequenceClassification, BertTokenizer
  5 | import numpy as np
  6 | import os 
  7 | 
  8 | MAX_LENGTH = 512
  9 | START_TOKEN = 101
 10 | STOP_TOKEN = 102
 11 | PADDING_TOKEN = 0
 12 | ACTIVE_MASK = 1
 13 | IGNORE_MASK = 0
 14 | CLASSES = [-1,0,1]
 15 | TOKENIZER_MODEL = os.getenv('NEWSSCRAPER_SENTIMENT_TOKENIZER',"ProsusAI/finbert") 
 16 | SEQUENZECLASSIFICATION_MODEL = os.getenv('NEWSSCRAPER_SENTIMENT_SEQUENZMODEL',"ProsusAI/finbert") 
 17 | MODEL_DIR = os.path.abspath(os.getenv('NEWSSCRAPER_MODEL_DIR',"../../../sentiment_model"))
 18 | 
 19 | 
 20 | class  SentimentProvider(object):
 21 |     tokenizer: BertTokenizer
 22 |     model: torch.jit._script.RecursiveScriptModule
 23 |     def __init__(self) -> None:
 24 |         self.tokenizer = None
 25 |         self.model = None
 26 |         os.makedirs(MODEL_DIR,exist_ok=True)
 27 | 
 28 |     @property
 29 |     def is_model_loaded(self)->bool:
 30 |         return self.model is not None and self.tokenizer is not None
 31 |     
 32 |     def load_model(self)->None:
 33 |         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 34 |         self.tokenizer = BertTokenizer.from_pretrained(TOKENIZER_MODEL)
 35 |         os.makedirs(MODEL_DIR,exist_ok=True)
 36 |         if not os.path.isfile(os.path.join(MODEL_DIR,"model.pt")):
 37 |             #build the torchscript model to gain some speed
 38 |             logging.info("Building torchscript model ...")
 39 |             model = BertForSequenceClassification.from_pretrained(SEQUENZECLASSIFICATION_MODEL)
 40 |             model.eval()
 41 |             input_ids = torch.rand((1, MAX_LENGTH)).long()
 42 |             attention_mask = torch.rand((1, MAX_LENGTH)).int()
 43 |             self.model = torch.jit.trace(model, [input_ids,attention_mask],strict=False)
 44 |             self.model.save(os.path.join(MODEL_DIR,"model.pt"))
 45 |             logging.info("Finished torchscript model!")
 46 |         else:
 47 |             self.model = torch.jit.load(os.path.join(MODEL_DIR,"model.pt"))
 48 |             
 49 |         self.model.eval()
 50 |         self.model = self.model.to(self.device)
 51 |         
 52 |     def dispose_model(self)->None:
 53 |         del self.model
 54 |         self.model = None
 55 |         del self.tokenizer
 56 |         self.tokenizer = None
 57 |         
 58 |     def get_sentiment(self,text:str) -> tuple[int,np.ndarray]:
 59 |         """
 60 |         Computes the class and the probability of the logits for the given text.
 61 |         """
 62 |         if not self.is_model_loaded:
 63 |             self.load_model()
 64 |             
 65 |         with torch.no_grad():
 66 |             tokenized = self.encode(text)
 67 |             prediction = f.softmax(self.model(tokenized['input_ids'],tokenized['attention_mask'])['logits'], dim=-1).mean(dim=0).cpu().numpy()
 68 |             predicted_class = CLASSES[np.argmax(prediction)]
 69 |             return predicted_class,prediction
 70 |             
 71 |     
 72 |     def encode(self,text:str):
 73 |         """
 74 |         Uses the tokenizer to build MAX_LENGTH long slices of the text. 
 75 |         """
 76 |         with torch.no_grad():
 77 |             tokenized = self.tokenizer.encode_plus(text,add_special_tokens=False,return_tensors="pt")
 78 |             #to support longer texts we split the sequence and pad it manually => then we pass it to the model 
 79 |             split_length = MAX_LENGTH-2
 80 |             input_id_chunks = tokenized['input_ids'][0].split(split_length)
 81 |             mask_chunks = tokenized['attention_mask'][0].split(split_length)
 82 | 
 83 |             padded_ids= []
 84 |             padded_masks = []
 85 |             for i in range(len(input_id_chunks)):
 86 |                 padded_ids.append(torch.cat([torch.Tensor([START_TOKEN]),input_id_chunks[i],torch.Tensor([STOP_TOKEN])]))
 87 |                 padded_masks.append(torch.cat([torch.Tensor([ACTIVE_MASK]),mask_chunks[i],torch.Tensor([ACTIVE_MASK])]))
 88 |                 
 89 |             for i in range(len(padded_ids)):
 90 |                 padding_length = MAX_LENGTH - len(padded_ids[i])
 91 |                 if padding_length > 0:
 92 |                     padded_ids[i] = torch.cat([padded_ids[i],torch.Tensor([PADDING_TOKEN]*padding_length)])
 93 |                     padded_masks[i] = torch.cat([padded_masks[i],torch.Tensor([IGNORE_MASK]*padding_length)])
 94 | 
 95 |             input_ids = torch.stack(padded_ids).long().to(self.device)
 96 |             attention_mask = torch.stack(padded_masks).int().to(self.device)
 97 |             return{
 98 |                 'input_ids': input_ids,
 99 |                 'attention_mask': attention_mask
100 |             }
101 |             
102 |             
103 | if __name__ == "__main__":
104 |     sentimentProvider = SentimentProvider()
105 |     sentimentProvider.load_model()
106 |     sentimentProvider.get_sentiment("Hello World "*1000)
107 |  


--------------------------------------------------------------------------------
/src/stocks/src/finance_stock_scraper/__main__.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import os
  3 | import pytz
  4 | import pandas_market_calendars as mcal
  5 | import logging
  6 | from datetime import datetime, timedelta
  7 | from finance_stock_scraper.ExecutionContext import ExecutionContext
  8 | from finance_stock_scraper.QuestClient import QuestClient
  9 | from finance_stock_scraper.TickerRepository import TickerRepository
 10 | from finance_stock_scraper.YFDataProvider import YFDataProvider
 11 | from finance_stock_scraper.model.Ticker import Ticker
 12 | from finance_stock_scraper.workflow import gather_data
 13 | 
 14 | 
 15 | 
 16 | TICKERS_DIR = os.path.abspath(os.getenv('STOCKSCRAPER_TICKERS_DIR',"../../../Tickers"))
 17 | DEBUG = os.getenv('STOCKSCRAPER_DEBUG',"False").upper() == "TRUE"
 18 | MODE = os.getenv('STOCKSCRAPER_MODE',"Single").upper() # Single or Scheduled
 19 | SLEEP_TIME = int(os.getenv('STOCKSCRAPER_SLEEPTIME',60*60*3)) # 3 hours
 20 | 
 21 | if __name__ == "__main__":
 22 |     
 23 |     logging.getLogger("requests").setLevel(logging.WARNING)
 24 |     logging.getLogger("urllib3").setLevel(logging.WARNING)
 25 |     
 26 |     if DEBUG:
 27 |         logging.basicConfig(format='[%(asctime)s] %(levelname)s - %(message)s',level=logging.DEBUG)
 28 |     else:
 29 |         logging.basicConfig(format='[%(asctime)s] %(levelname)s - %(message)s',level=logging.INFO)
 30 |         
 31 |     logging.info(f"---Starting Scraper---")
 32 |     logging.info(f"TICKERS_DIR:{TICKERS_DIR}")
 33 |     logging.info(f"MODE:{MODE}")
 34 |     logging.info(f"SLEEP_TIME:{SLEEP_TIME}")
 35 |         
 36 |     # Create QuestClient
 37 |     questClient = QuestClient()
 38 |     retries = 0
 39 |     while True:
 40 |         if questClient.health_check():
 41 |             break
 42 |         else:
 43 |             logging.warning(f"Could not establish connection to QuestDB! Retrying ...")
 44 |             retries += 1
 45 |             time.sleep(2)
 46 |             
 47 |         if retries > 10:
 48 |             raise Exception("Could not connect to QuestDB!")
 49 |         
 50 |     if not os.path.isdir(TICKERS_DIR):
 51 |         raise Exception(f"Tickers Directory '{TICKERS_DIR}' does not exist!")
 52 |     
 53 |     
 54 |     ticker_repo = TickerRepository(questClient)
 55 |     ticker_repo.load_tickers(TICKERS_DIR)
 56 |     
 57 |     logging.info("Loaded Tickers:")
 58 |     for exchange in ticker_repo.exchanges:
 59 |         logging.info(f"{exchange}:")
 60 |         logging.info(",".join([ticker for ticker in ticker_repo.exchanges[exchange]]))
 61 |     
 62 |     yfDataProvider = YFDataProvider()
 63 |     
 64 |     executionContext = ExecutionContext(ticker_repo, yfDataProvider, questClient)
 65 | 
 66 |     # if its in single mode, we will run the gathering process once and then exit
 67 |     if MODE == "SINGLE":
 68 |         for exchange in ticker_repo.exchanges:
 69 |             now = datetime.now().astimezone(pytz.utc)
 70 |             gather_data(exchange, executionContext,now)
 71 |     else:
 72 |         # otherwise, we will run the gathering process in a loop
 73 |         last_runs = {} 
 74 |         while True:
 75 |             try:
 76 |                 #check all exchanges and if we are are after the tradingtimes we start the gathering process
 77 |                 now = datetime.now().astimezone(pytz.utc)
 78 |                 for exchange in ticker_repo.exchanges:
 79 |                     
 80 |                     #Check if we already run the gathering process for this exchange today
 81 |                     if exchange in last_runs:
 82 |                         last_run = last_runs[exchange]
 83 |                         if now-last_run < timedelta(hours=23,minutes=45):
 84 |                             continue
 85 |                         
 86 |                     calender = mcal.get_calendar(exchange)
 87 |                     schedule = calender.schedule(start_date=now, end_date=now)
 88 |                     if len(schedule) == 0:
 89 |                         #Not a trading day => nothing to fetch
 90 |                         continue
 91 |                     closing_time = schedule["market_close"][0]
 92 |                     #Yahoo Finance can have a delay of 15-30 Minutes for the data  to be available => we add 30 minutes to the closing time
 93 |                     closing_time += timedelta(minutes=30)
 94 |                     if now > closing_time:
 95 |                         #we are after the closing time => we start the gathering process
 96 |                         last_runs[exchange] = now
 97 |                         logging.info(f"Starting gathering process for exchange {exchange}...")
 98 |                         gather_data(exchange, executionContext, now)
 99 |                         logging.info(f"Finished gathering process for exchange {exchange}!")
100 |                         
101 |                 #sleep for a while
102 |                 logging.info(f"Sleeping for {SLEEP_TIME} Seconds!")
103 |                 time.sleep(SLEEP_TIME)
104 |             except Exception as e:
105 |                 logging.error(e)
106 |                 
107 |                     
108 |     
109 |         
110 |     
111 |         
112 |     
113 |         


--------------------------------------------------------------------------------
/src/stocks/src/finance_stock_scraper/QuestClient.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime,date
  2 | import os
  3 | from questdb.ingress import Sender, TimestampNanos, Buffer
  4 | import requests 
  5 | from requests import Response
  6 | import pytz
  7 | from finance_stock_scraper.model.Ticker import Ticker
  8 | from finance_stock_scraper.model.Intervals import INTERVALS, IntervalTypes
  9 | 
 10 | 
 11 | HOST = os.getenv('STOCKSCRAPER_QUESTDB_HOST','localhost')
 12 | INFLUX_LINE_PROTOCOL_PORT = os.getenv('STOCKSCRAPER_QUESTDB_ILP_PORT',9009) 
 13 | REST_PORT = os.getenv('STOCKSCRAPER_QUESTDB_PORT',9000) 
 14 | MONITORING_PORT = os.getenv('STOCKSCRAPER_QUESTDB_MONITORING_PORT',9003)
 15 | 
 16 | class QuestClient(object):
 17 |     def __init__(self,host:str=HOST,port:int=REST_PORT,ilp_port:int=INFLUX_LINE_PROTOCOL_PORT,monitoring_port:int=MONITORING_PORT)-> None:
 18 |         self.host = host
 19 |         self.ilp_port = ilp_port
 20 |         self.port = port
 21 |         self.monitoring_port = monitoring_port
 22 |     
 23 |     def health_check(self)-> bool:
 24 |         try:
 25 |             return requests.get(f"http://{self.host}:{self.monitoring_port}/status").status_code == 200
 26 |         except:
 27 |             return False
 28 |             
 29 |     
 30 |         
 31 |     def _format_time(self,time:datetime)-> str:
 32 |         return time.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
 33 |     
 34 |     def create_table(self,interval:str)-> None:
 35 |         if interval not in INTERVALS:
 36 |             raise Exception(f"Interval {interval} is not supported")
 37 |         interval_type = INTERVALS[interval]
 38 |         
 39 |         query = f"CREATE TABLE IF NOT EXISTS 'interval_{interval}'"\
 40 |             "("\
 41 |                 "exchange Symbol,"\
 42 |                 "ticker Symbol,"\
 43 |                 "open float,"\
 44 |                 "high float,"\
 45 |                 "low float,"\
 46 |                 "close float,"\
 47 |                 "adj_close float,"\
 48 |                 "volume long,"\
 49 |                 "timestamp TIMESTAMP"\
 50 |             "),"\
 51 |             "index (ticker)"\
 52 |             "timestamp(timestamp)"\
 53 |             f"PARTITION BY {'YEAR' if interval_type == IntervalTypes.Daily else 'MONTH'};"
 54 |         self.raw_query(query)
 55 |         
 56 | 
 57 | 
 58 |           
 59 |     def get_existing_tickers_for_interval(self,interval:str,exchange:str)->list[str]:
 60 |         query = f"SELECT DISTINCT ticker FROM 'interval_{interval}' WHERE exchange = '{exchange}'"
 61 |         response = self.raw_query(query)
 62 |         if response.status_code == 200:
 63 |             return [ticker[0] for ticker in response.json()['dataset']]
 64 |         else:
 65 |             return []
 66 |         
 67 |         
 68 |     def get_last_entry_dates(self,interval:str)-> dict[str,datetime]:
 69 |         query = f"SELECT ticker, timestamp FROM 'interval_{interval}'"\
 70 |                 "LATEST ON timestamp PARTITION BY ticker;"
 71 |                 
 72 |         response = self.raw_query(query)
 73 |         last_entries = {}
 74 |         if response.status_code == 200:
 75 |             for ticker,time in response.json()['dataset']:
 76 |                 last_entries[ticker] = datetime.strptime(time,"%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=pytz.UTC)
 77 |             return last_entries
 78 |         else:
 79 |             return last_entries
 80 |     
 81 |     def store_points(self,buffer:Buffer)-> None:
 82 |         if len(buffer) > 0:
 83 |             with Sender(self.host, self.ilp_port) as sender:
 84 |                 sender.flush(buffer)
 85 |               
 86 |     def get_data(self,ticker:Ticker,interval:str,values:list[str]=["close"],start_date:datetime|None=None,end_date:datetime|None=None)-> None|dict:
 87 |         """
 88 |         Querry data for the given ticker and interval
 89 |         """
 90 |         selection = ["timestamp"]+values
 91 |         selection = ",".join(selection)
 92 |         query = f"SELECT {selection} FROM 'interval_{interval}'"
 93 |         query += "WHERE "
 94 |         if end_date and start_date:
 95 |             query += f"timestamp BETWEEN '{self._format_time(start_date)}' AND '{self._format_time(end_date)}' AND "
 96 |         elif end_date and not start_date:
 97 |             query += f"timestamp <= '{self._format_time(end_date)}' AND "
 98 |         elif start_date and not end_date:
 99 |             query += f"timestamp >= '{self._format_time(start_date)}' AND "
100 |             
101 |         query += f"ticker='{ticker.ticker}' AND exchange='{ticker.exchange}';"
102 |         
103 |         response = self.raw_query(query)
104 |         if response.status_code == 200:
105 |             return response.json()
106 |         return None
107 |     
108 |     def raw_query(self,query:str)-> Response:
109 |         return requests.get(f"http://{self.host}:{self.port}/exec?query=" + requests.utils.quote(query))
110 |                      
111 | if __name__ == "__main__":
112 |     questClient = QuestClient()
113 |     questClient.get_last_entry_dates("1m")
114 |     ticker = Ticker("GOOGL","NASDAQ")
115 |     result = questClient.get_data(ticker, "1d",values=["close","volume"],start_date=datetime(year = 2010,month=1,day=1),end_date=datetime(year = 2011,month=1,day=1))


--------------------------------------------------------------------------------
/src/news/src/tests/test_mongo_db_client.py:
--------------------------------------------------------------------------------
  1 | from multiprocessing.context import assert_spawning
  2 | import mongomock
  3 | import pymongo
  4 | from finance_news_scraper.mongo_client import MongoDBClient
  5 | from finance_news_scraper.news_sources import News_Item
  6 | from datetime import datetime
  7 | from newspaper import Article
  8 | import numpy as np
  9 | import pytz
 10 | import pandas as pd
 11 | 
 12 | @mongomock.patch(servers=(('server.example.com', 27017),))
 13 | def test_client_creates_collections():
 14 |     client = MongoDBClient('server.example.com',27017,better_compression=False)
 15 |     assert client.article_collection is not None
 16 |     assert client.sentiment_collection is not None
 17 | 
 18 | @mongomock.patch(servers=(('server.example.com', 27017),))
 19 | def test_client_can_build_article():
 20 |   client = MongoDBClient('server.example.com',27017,better_compression=False)
 21 |   news_item = News_Item('publisher', 'link', ["A", "B"], datetime.now())
 22 |   article = Article(news_item.link)
 23 |   article.text = "Test Text"
 24 |   article.authors = ["Foo","Bar"]
 25 |   document = client.build_document(news_item,article)
 26 |   assert document['text'] == "Test Text"
 27 |   assert document['authors'] == ["Foo","Bar"]
 28 |   assert document['url'] == news_item.link
 29 |   assert document['hash'] == news_item.hash
 30 |   assert document['publisher'] == news_item.publisher
 31 |   assert document['tickers'] == news_item.tickers
 32 |   assert document['date'] == news_item.pub_date
 33 |   
 34 | @mongomock.patch(servers=(('server.example.com', 27017),))
 35 | def test_client_can_insert_article():
 36 |   client = MongoDBClient('server.example.com',27017,better_compression=False)
 37 |   news_item = News_Item('publisher', 'link', ["A", "B"], datetime.now())
 38 |   article = Article(news_item.link)
 39 |   article.text = "Test Text"
 40 |   article.authors = ["Foo","Bar"]
 41 |   document = client.build_document(news_item,article)
 42 |   client.insert_article(document)
 43 |   inserted = client.article_collection.find_one()
 44 |   assert inserted['hash'] == news_item.hash
 45 |   
 46 |   
 47 | @mongomock.patch(servers=(('server.example.com', 27017),))
 48 | def test_client_can_find_article_by_hash():
 49 |   client = MongoDBClient('server.example.com',27017,better_compression=False)
 50 |   client.article_collection.insert_one({'hash':'foobar'})
 51 |   found = client.find_article_by_hash('foobar')
 52 |   assert found is not None
 53 |   
 54 | @mongomock.patch(servers=(('server.example.com', 27017),))
 55 | def test_client_can_insert_sentiment():
 56 |   client = MongoDBClient('server.example.com',27017,better_compression=False)
 57 |   now = datetime.now()
 58 |   client.insert_sentiment((1,np.array([1,2,3])),'foobar',now,['A','B'])
 59 |   inserted = client.sentiment_collection.find_one()
 60 |   assert inserted['article_hash'] == 'foobar'
 61 |   assert inserted['tickers'] == ['A','B']
 62 |   assert inserted['sentiment']['class'] == 1
 63 |   assert inserted['sentiment']['probabilities'] == [1,2,3]
 64 | 
 65 | @mongomock.patch(servers=(('server.example.com', 27017),))
 66 | def test_client_can_get_articles():
 67 |   client = MongoDBClient('server.example.com',27017,better_compression=False)
 68 |   client.article_collection.insert_many([
 69 |     {"hash":"A","tickers":["A"]},
 70 |     {"hash":"B","tickers":["A","B"]},
 71 |     {"hash":"C","tickers":["B"]}
 72 |   ])
 73 |   articles = list(client.get_articles(["A"]))
 74 |   assert articles is not None
 75 |   assert len(articles) == 2
 76 | 
 77 | @mongomock.patch(servers=(('server.example.com', 27017),))
 78 | def test_client_can_get_articles_with_startdate():
 79 |   client = MongoDBClient('server.example.com',27017,better_compression=False)
 80 | 
 81 |   client.article_collection.insert_many([
 82 |     {"hash":"A","tickers":["A"], "date":datetime(2018,1,1)},
 83 |     {"hash":"B","tickers":["A"], "date":datetime(2019,1,1)},
 84 |     {"hash":"C","tickers":["A"],"date":datetime(2020,1,1)},
 85 |     {"hash":"D","tickers":["A"],"date":datetime(2021,1,1)}
 86 |   ])
 87 |   articles = list(client.get_articles(["A"],start=datetime(2019,6,6)))
 88 |   assert articles is not None
 89 |   assert len(articles) == 2
 90 |   assert articles[0]['hash'] == 'D'
 91 |   assert articles[1]['hash'] == 'C'
 92 |   
 93 | @mongomock.patch(servers=(('server.example.com', 27017),))
 94 | def test_client_can_get_articles_with_enddate():
 95 |   client = MongoDBClient('server.example.com',27017,better_compression=False)
 96 | 
 97 |   client.article_collection.insert_many([
 98 |     {"hash":"A","tickers":["A"], "date":datetime(2018,1,1)},
 99 |     {"hash":"B","tickers":["A"], "date":datetime(2019,1,1)},
100 |     {"hash":"C","tickers":["A"],"date":datetime(2020,1,1)},
101 |     {"hash":"D","tickers":["A"],"date":datetime(2021,1,1)}
102 |   ])
103 |   articles = list(client.get_articles(["A"],end=datetime(2019,6,6)))
104 |   assert articles is not None
105 |   assert len(articles) == 2
106 |   assert articles[0]['hash'] == 'B'
107 |   assert articles[1]['hash'] == 'A'
108 |   
109 | @mongomock.patch(servers=(('server.example.com', 27017),))
110 | def test_client_can_get_articles_with_start_and_enddate():
111 |   client = MongoDBClient('server.example.com',27017,better_compression=False)
112 | 
113 |   client.article_collection.insert_many([
114 |     {"hash":"A","tickers":["A"], "date":datetime(2018,1,1)},
115 |     {"hash":"B","tickers":["A"], "date":datetime(2019,1,1)},
116 |     {"hash":"C","tickers":["A"],"date":datetime(2020,1,1)},
117 |     {"hash":"D","tickers":["A"],"date":datetime(2021,1,1)},
118 |     {"hash":"E","tickers":["A"],"date":datetime(2022,1,1)}
119 |   ])
120 |   articles = list(client.get_articles(["A"],start=datetime(2019,6,6),end=datetime(2021,6,6)))
121 |   assert articles is not None
122 |   assert len(articles) == 2
123 |   assert articles[0]['hash'] == 'D'
124 |   assert articles[1]['hash'] == 'C'
125 |   
126 | @mongomock.patch(servers=(('server.example.com', 27017),))
127 | def test_client_can_get_raw_sentiment():
128 |     client = MongoDBClient('server.example.com',27017,better_compression=False)
129 | 
130 |     client.sentiment_collection.insert_many([
131 |       {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2018,1,1)},
132 |       {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2019,1,1)},
133 |       {"tickers":["B"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2020,1,1)},
134 |     ])
135 |     articles = list(client.get_raw_sentiments(["A"]))
136 |     assert articles is not None
137 |     assert len(articles) == 2
138 |     
139 | @mongomock.patch(servers=(('server.example.com', 27017),))
140 | def test_client_can_get_sentiment():
141 |     client = MongoDBClient('server.example.com',27017,better_compression=False)
142 | 
143 |     client.sentiment_collection.insert_many([
144 |       {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2018,1,1)},
145 |       {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2019,1,1)},
146 |       {"tickers":["B"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2020,1,1)},
147 |     ])
148 |     df = client.get_sentiments(["A"],frequency='Y',fill_blanks=False)
149 |     assert isinstance(df,pd.DataFrame)
150 |     assert len(df) == 2
151 |     
152 |     
153 | @mongomock.patch(servers=(('server.example.com', 27017),))
154 | def test_client_get_sentiment_interpolates_values():
155 |     client = MongoDBClient('server.example.com',27017,better_compression=False)
156 | 
157 |     client.sentiment_collection.insert_many([
158 |       {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2018,1,1)},
159 |       {"tickers":["A"],"sentiment":{"class":0,'probabilities':[0,1,0] },"date":datetime(2019,1,1)},
160 |       {"tickers":["A"],"sentiment":{"class":-1,'probabilities':[1,0,0] },"date":datetime(2020,1,1)},
161 |     ])
162 |     df = client.get_sentiments("A",frequency='D',fill_blanks=True)
163 |     assert isinstance(df,pd.DataFrame)
164 |     assert len(df) == 731
165 |     assert df.isna().sum().sum() == 0
166 |     
167 | @mongomock.patch(servers=(('server.example.com', 27017),))
168 | def test_client_get_sentiment_can_get_2_sentiments():
169 |     client = MongoDBClient('server.example.com',27017,better_compression=False)
170 | 
171 |     client.sentiment_collection.insert_many([
172 |       {"tickers":["A"],"sentiment":{"class":1,'probabilities':[0,0,1] },"date":datetime(2018,1,1)},
173 |       {"tickers":["A"],"sentiment":{"class":0,'probabilities':[0,1,0] },"date":datetime(2019,1,1)},
174 |       {"tickers":["B"],"sentiment":{"class":-1,'probabilities':[1,0,0] },"date":datetime(2020,1,1)},
175 |     ])
176 |     result = client.get_sentiments(["A","B"],frequency='Y',fill_blanks=False)
177 |     assert isinstance(result,dict)
178 |     assert len(result) == 2
179 |     assert len(result['B']) == 1
180 |     assert len(result['A']) == 2


--------------------------------------------------------------------------------
/src/news/src/finance_news_scraper/__main__.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import logging
  4 | from finance_news_scraper.mongo_client import MongoDBClient
  5 | from tqdm import tqdm 
  6 | from newspaper import Article
  7 | from newspaper.article import ArticleException
  8 | from finance_news_scraper.sentiment import SentimentProvider
  9 | from finance_news_scraper.news_sources import get_rss_items,get_finviz_news_items,get_google_news_items
 10 | from newspaper import Config
 11 | import pandas as pd
 12 | from finance_news_scraper.news_sources import News_Item
 13 | from tqdm.contrib.concurrent import thread_map
 14 | import time
 15 | 
 16 | RSS_DIR = os.path.abspath(os.getenv('NEWSSCRAPER_RSS_DIR',"../../../news"))
 17 | TICKERS_DIR = os.path.abspath(os.getenv('STOCKSCRAPER_TICKERS_DIR',"../../../Tickers"))
 18 | DEBUG = os.getenv('NEWSSCRAPER_DEBUG',"True").upper() == "TRUE"
 19 | MODE = os.getenv('NEWSSCRAPER_MODE',"Single").upper() # Single or Scheduled
 20 | SLEEP_TIME = int(os.getenv('NEWSSCRAPER_SLEEPTIME',60*60*6)) # 6 hours
 21 | SENTIMENT_MODE = os.getenv('NEWSSCRAPER_SENTIMENT_MODE',"ALL").upper() # ALL or NEW
 22 | PERFORM_SENTIMENT_ANALYSIS = os.getenv('NEWSSCRAPER_SENTIMENT_ANALYSIS',"TRUE").upper() == "TRUE"
 23 | PERFORM_NEWS_SCRAPING = os.getenv('NEWSSCRAPER_SCRAPE_NEWS',"TRUE").upper() == "TRUE"
 24 | DOWNLOAD_RSS_FEED = os.getenv('NEWSSCRAPER_DOWNLOAD_RSS_FEED',"FALSE").upper() == "TRUE"
 25 | DOWNLOAD_GOOGLE_NEWS = os.getenv('NEWSSCRAPER_DOWNLOAD_GOOGLE_NEWS',"TRUE").upper() == "TRUE"
 26 | DOWNLOAD_FINVIZ_NEWS = os.getenv('NEWSSCRAPER_DOWNLOAD_FINVIZ_NEWS',"FALSE").upper() == "TRUE"
 27 | 
 28 | USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0'
 29 | JS_ERROR_TEXTS = [
 30 |     "please enable Javascript",
 31 |     "Javascript is Disabled"
 32 | ]
 33 | 
 34 | if __name__ == "__main__":
 35 |     
 36 |     logging.getLogger("requests").setLevel(logging.WARNING)
 37 |     logging.getLogger("urllib3").setLevel(logging.WARNING)
 38 |     logging.getLogger("huggingface").setLevel(logging.WARNING)
 39 |     logging.getLogger("newspaper").setLevel(logging.WARNING)
 40 |     logging.getLogger("transformers").setLevel(logging.WARNING)
 41 |     logging.getLogger("httpx").setLevel(logging.WARNING)
 42 |     logging.getLogger("asyncio").setLevel(logging.WARNING)
 43 |     
 44 |     if DEBUG:
 45 |         logging.basicConfig(format='[%(asctime)s] %(levelname)s - %(message)s',level=logging.DEBUG)
 46 |     else:
 47 |         logging.basicConfig(format='[%(asctime)s] %(levelname)s - %(message)s',level=logging.INFO)
 48 |         
 49 |     logging.info(f"RSS_DIR:{RSS_DIR}")
 50 |     logging.info(f"TICKERS_DIR:{TICKERS_DIR}")
 51 |     
 52 |     os.makedirs(RSS_DIR,exist_ok=True)
 53 |     os.makedirs(TICKERS_DIR,exist_ok=True)
 54 |     
 55 |     rss_feeds = None
 56 |     rss_file = os.path.join(RSS_DIR,"rss-feeds.json")
 57 |     if os.path.isfile(rss_file):
 58 |         with open(rss_file) as f:
 59 |             rss_feeds = json.load(f)
 60 | 
 61 |     files = [file for file in os.listdir(TICKERS_DIR) if file.endswith(".csv")]
 62 |     logging.info(f"Found Ticker files: {','.join(files)}")
 63 |        
 64 |       
 65 |     tickers=[]               
 66 |     for file in files:
 67 |         file = os.path.join(TICKERS_DIR,file)
 68 |         exchange = os.path.basename(file).split('.')[0].upper()
 69 |         local_tickers = pd.read_csv(file)
 70 |         for ticker in local_tickers.values:
 71 |             if ticker[0] is not None and isinstance(ticker[0],str):
 72 |                 tickers.append((ticker[0],ticker[1],ticker[2]))
 73 |                     
 74 |     config = Config()
 75 |     config.browser_user_agent = USER_AGENT
 76 |     config.request_timeout = 7
 77 |     config.fetch_images = False
 78 |     
 79 |     mongoClient = MongoDBClient()
 80 |     sentimentProvider = SentimentProvider()
 81 |     
 82 |     while True:
 83 |         news_items:list[News_Item] = []
 84 |         
 85 |         if PERFORM_NEWS_SCRAPING:
 86 |             
 87 |             if DOWNLOAD_GOOGLE_NEWS and len(tickers) > 0:
 88 |                 news_items += get_google_news_items(tickers[:10])
 89 |                 
 90 |             if DOWNLOAD_FINVIZ_NEWS and len(tickers) >  0:
 91 |                 news_items += get_finviz_news_items(tickers)
 92 |                 
 93 |             if DOWNLOAD_RSS_FEED and rss_feeds:
 94 |                 for publisher,feed in tqdm(rss_feeds.items(),desc="RSS Feeds"):
 95 |                     news_items += get_rss_items(feed,publisher)
 96 |             
 97 |         #Group by site hash
 98 |         grouped_news_items = {}
 99 |         for news_item in news_items:
100 |             if news_item.hash in grouped_news_items:
101 |                 grouped_news_items[news_item.hash].append(news_item)
102 |             else:
103 |                 grouped_news_items[news_item.hash] = [news_item]
104 |                 
105 |         cleaned_news_items = []
106 |         
107 |         #Collaps duplicates into a single item
108 |         for hash,news_items in grouped_news_items.items():
109 |             news_item = news_items[0]
110 |             tickers = []
111 |             for item in news_items:
112 |                 if item.tickers:
113 |                     tickers += item.tickers
114 |             tickers = list(set(tickers))
115 |             news_item.tickers = tickers
116 |             cleaned_news_items.append(news_item)
117 |         
118 |         def download_news_items(news_items:list[News_Item])->list[str]:
119 |             
120 |             hashes = []
121 |             to_store = []
122 |             for news_item in news_items:
123 |                 try:
124 |                     existing_entry = mongoClient.find_article_by_hash(news_item.hash)
125 |                     if existing_entry:
126 |                         #if needed update the tickers in the entry
127 |                         if mongoClient.update_article_tickers(existing_entry["_id"],existing_entry["tickers"],news_item.tickers):
128 |                             hashes.append(news_item.hash)
129 |                         continue
130 |                     
131 |                     article = Article(news_item.link,language="en",config=config, fetch_images=False)
132 |                     article.download()
133 |                     article.parse()
134 |                     
135 |                     for js_error_text in JS_ERROR_TEXTS:
136 |                         if js_error_text in article.text:
137 |                             logging.debug(f"JS ERROR: {news_item.link}")
138 |                             failed_downloads.append(news_item.link)
139 |                             continue
140 |                          
141 |                     to_store.append(mongoClient.build_document(news_item,article))
142 |                     hashes.append(news_item.hash)
143 |                   
144 |                 except ArticleException as articleException:
145 |                     logging.debug(f"ArticleError: {articleException}")
146 |                     failed_downloads.append(news_item.link)
147 |                       
148 |                 except Exception as e:
149 |                     logging.error(f"{e}")
150 |                     continue
151 |                 
152 |             mongoClient.insert_articles(to_store)
153 |             return hashes
154 |           
155 | 
156 |         hashes = []
157 |         failed_downloads = []
158 |         logging.info(f"Found {len(cleaned_news_items)} articles!")
159 |         if len(cleaned_news_items) > 0:
160 |             list_of_hashes = []
161 |             
162 |             def chunks(lst, n):
163 |                 """Yield successive n-sized chunks from lst."""
164 |                 for i in range(0, len(lst), n):
165 |                     yield lst[i:i + n]
166 |                 
167 |             def flatten(l):
168 |                 return [item for sublist in l for item in sublist]
169 |                       
170 |             list_of_hashes = thread_map(download_news_items,list(chunks(cleaned_news_items,25)),max_workers=16,desc="Downloading News Items")
171 |             hashes = flatten(list_of_hashes)
172 |             
173 |             hashes = set([hash for hash in list(set(hashes)) if hash])
174 |             logging.info(f"Failed {len(failed_downloads)} Downloads!")
175 |             logging.info(f"Stored {len(hashes)} items!")
176 |         
177 |         #Sentiment Analysis
178 |         if PERFORM_SENTIMENT_ANALYSIS:
179 |             logging.info(f"Start Sentiment Analysis")
180 |             
181 |             def analyse_article(hash:str):
182 |                 try:
183 |                     article = mongoClient.find_article_by_hash(hash)
184 |                     if article:
185 |                         existing_sentiment = mongoClient.find_sentiment_by_hash(hash)
186 |                         if existing_sentiment:
187 |                             #if needed update the tickers in the entry
188 |                             mongoClient.update_sentiment_tickers(existing_sentiment["_id"],existing_sentiment["tickers"],article["tickers"])
189 |                             return
190 |                             
191 |                         sentiment = sentimentProvider.get_sentiment(article["text"])
192 |                         mongoClient.insert_sentiment(sentiment,hash,article["date"],article["tickers"])
193 |                 except Exception as e:
194 |                     logging.error(f"{e}")
195 |             
196 |             sentiments_to_calculate = []
197 |             
198 |             if SENTIMENT_MODE == "ALL":
199 |                 #Get all Articles that have no sentiment
200 |                 sentiments_to_calculate = mongoClient.get_all_article_hashes().difference(mongoClient.get_all_sentiment_hashes())
201 |             else:
202 |                 #Only process the new articles
203 |                 sentiments_to_calculate = hashes
204 |             
205 |             if len(sentiments_to_calculate) > 0:
206 |                 for hash in tqdm(sentiments_to_calculate,desc="Sentiment Analysis"):
207 |                     analyse_article(hash)
208 |             logging.info(f"Finished Sentiment Analysis!")   
209 |                
210 |         if MODE == "SINGLE":
211 |             break
212 |         
213 |         logging.info(f"Sleeping for {SLEEP_TIME} seconds ...")
214 |         sentimentProvider.dispose_model()
215 |         time.sleep(SLEEP_TIME)


--------------------------------------------------------------------------------
/src/stocks/src/finance_stock_scraper/workflow.py:
--------------------------------------------------------------------------------
  1 | from finance_stock_scraper.ExecutionContext import ExecutionContext
  2 | from finance_stock_scraper.model.Intervals import INTERVALS,IntervalTypes
  3 | from finance_stock_scraper.model.Ticker import Ticker
  4 | import pytz
  5 | import pandas as pd
  6 | import os
  7 | import logging
  8 | import traceback
  9 | from tqdm import tqdm
 10 | import ctypes
 11 | import numpy as np
 12 | from questdb.ingress import TimestampNanos, Buffer
 13 | from datetime import datetime,date,timedelta
 14 | 
 15 | CONFIGURED_INTERVALS = os.getenv("STOCKSCRAPER_INTERVALS","5m,1d").split(",")
 16 | FLUX_PROTOCOL_MAX_INT = 2_147_483_647 #In theory this should be the int64 max but flux-line-protocol in quest db only supports up to int32
 17 | 
 18 | def get_interval(interval:str)->IntervalTypes:
 19 |     if interval not in INTERVALS:
 20 |         raise ValueError(f"Unknown interval {interval}")
 21 |     return INTERVALS[interval]
 22 | 
 23 | def interval_to_timedelta(interval:str)->timedelta:
 24 |     match interval:
 25 |         case "1m":
 26 |             return timedelta(minutes=1)
 27 |         case "2m":
 28 |             return timedelta(minutes=2)
 29 |         case "5m":
 30 |             return timedelta(minutes=5)
 31 |         case "15m":
 32 |             return timedelta(minutes=15)
 33 |         case "30m":
 34 |             return timedelta(minutes=30)
 35 |         case "60m":
 36 |             return timedelta(hours=1)
 37 |         case "90m":
 38 |             return timedelta(hours=1,minutes=30)
 39 |         case "1h":
 40 |             return timedelta(hours=1)
 41 |         case "1d":
 42 |             return timedelta(days=1)
 43 |         case "5d":
 44 |             return timedelta(days=5)
 45 |         case "1wk":
 46 |             return timedelta(weeks=1)
 47 |         case "1mo":
 48 |             return timedelta(days=30)
 49 |         case "3mo":
 50 |             return timedelta(days=90)
 51 |         case _:
 52 |             raise ValueError(f"Unknown interval {interval}")
 53 |         
 54 | def difference(existing:list[str],tickers:list[Ticker])->list[Ticker]:
 55 |     return [ticker for ticker in tickers if ticker.ticker not in existing]
 56 | 
 57 | def make_datetime_tz_aware(datetime:datetime):
 58 |     if datetime.tzinfo is not None and datetime.tzinfo.utcoffset(datetime) is not None:
 59 |         return datetime.astimezone(pytz.UTC)
 60 |     else:
 61 |         return pytz.utc.localize(datetime)
 62 |     
 63 | def make_timestamp_tz_aware(timestamp:pd.Timestamp):
 64 |     if timestamp.tzinfo is not None and timestamp.tzinfo.utcoffset(timestamp) is not None:
 65 |         return timestamp.tz_convert(pytz.UTC)
 66 |     else:
 67 |         return timestamp.tz_localize(pytz.UTC)
 68 |     
 69 | def create_point(timestamp:pd.Timestamp,row:pd.Series,ticker:Ticker,interval:str,buffer:Buffer,minimal_date:datetime=datetime(1, 1, 1))->bool:
 70 |     
 71 |     #Invalide Data skip this row
 72 |     if row.isnull().values.any():
 73 |         return False
 74 |     
 75 |     timestamp = make_timestamp_tz_aware(timestamp)
 76 |     
 77 |     if timestamp.value <= 0 or timestamp <= minimal_date:
 78 |         return False
 79 |     
 80 |     buffer.row(
 81 |         f"interval_{interval}",
 82 |         symbols={
 83 |             "exchange":ticker.exchange,
 84 |             "ticker":ticker.ticker
 85 |             },
 86 |         columns={
 87 |             "open":float(row["Open"]),
 88 |             "high":float(row["High"]),
 89 |             "low":float(row["Low"]),
 90 |             "close":float(row["Close"]),
 91 |             "adj_close":float(row["Adj Close"]),
 92 |             "volume": min(int(row["Volume"]),FLUX_PROTOCOL_MAX_INT)
 93 |         },
 94 |         at=TimestampNanos(timestamp.value)
 95 |     )
 96 |     return True
 97 | 
 98 | def gather_data(exchange:str,executionContext:ExecutionContext,now:datetime=datetime.now()):
 99 |     """
100 |     Syncs the data of the given Exchange with the database
101 |     """
102 |     for interval in CONFIGURED_INTERVALS:
103 |         try:
104 |             logging.info(f"Starting {exchange} - {interval}")
105 |             executionContext.questClient.create_table(interval)
106 |             flow(exchange,interval,executionContext,now)
107 |         except Exception as e:
108 |             logging.error(e)
109 |             logging.debug(traceback.format_exc())
110 |         finally:
111 |             logging.info(f"Finished {exchange} - {interval}")
112 |                 
113 |     
114 |     
115 | def download_in_slices(tickers:list[Ticker],interval:str,exchange:str,start:datetime,stop:datetime,executionContext:ExecutionContext,include_start:bool=True,slice_size:int=6)->pd.DataFrame:
116 |     """
117 |     Some intraday data can only be downladed in slices of 6 Days at a time => we have to download in slices if we want to pull the last 30 days
118 |     """
119 |     dif = (stop-start).days
120 |     offsets = list(range(0,dif,slice_size))
121 |     if dif not in offsets:
122 |         offsets.append(dif)
123 |        
124 |     for i,offset in enumerate(offsets[:-1]):
125 |         local_start  = start+timedelta(days=offset)
126 |         local_end = start+timedelta(days=offsets[i+1])
127 |         data,errors = executionContext.yfDataProcider.get_data([ticker.ticker for ticker in  tickers],local_start,local_end,interval)
128 |         handle_errors(errors,executionContext)
129 |         #for many tickers (> 10.000) we get a lot of data (> 10GB) => we need to commit it to the database in slices
130 |         if data is not None:
131 |             store_points(data,tickers,f"Intraday Tickers (Slice {i+1}/{len(offsets[:-1])})",executionContext,interval,exchange,datetime(1, 1, 1) if include_start else start)
132 |         else:
133 |             joined_tickers = ",".join(tickers)
134 |             logging.warning(f"Could not download data for {joined_tickers} from {local_start} to {local_end}")
135 |     
136 |     
137 | def store_points(data:pd.DataFrame,tickers:list[Ticker],message:str,executionContext:ExecutionContext,interval:str,exchange:str,minimal_date:datetime=datetime(1, 1, 1))->None:
138 |     logging.info(f"[{message}] Storing Points ({interval}) for exchange {exchange} ...")
139 |     minimal_date = make_datetime_tz_aware(minimal_date)
140 |     stored_points = 0
141 |     current_iteration = 0
142 |     buffer = Buffer(init_capacity=1024*1024)
143 |     for ticker in tqdm(tickers,f"[{message}] Storing Points ({interval}) for exchange {exchange} ..."):
144 |         if ticker.ticker in data.columns:
145 |             try:
146 |                 for timestamp,row in data[ticker.ticker].iterrows():
147 |                     if create_point(timestamp,row,ticker,interval,buffer,minimal_date):
148 |                         current_iteration += 1
149 |                     
150 |                     if current_iteration > 30_000:
151 |                         executionContext.questClient.store_points(buffer)  
152 |                         stored_points += current_iteration
153 |                         current_iteration = 0
154 |             except Exception as e:
155 |                 logging.error(e)
156 |                 logging.debug(traceback.format_exc())
157 |                     
158 |     if current_iteration > 0:
159 |         executionContext.questClient.store_points(buffer)
160 |         stored_points += current_iteration
161 |         current_iteration = 0
162 |         
163 |     logging.info(f"[{message}] Stored {stored_points} Points ({interval}) for exchange {exchange}!")
164 |     
165 |     
166 |     
167 | def handle_errors(errors:dict,executionContext:ExecutionContext):
168 |     """
169 |     Handles the yFinanced errors and removes faulted tickers from the repository
170 |     """
171 |     if len(errors)>0:
172 |         logging.warning(f"{len(errors)} errors occured")
173 |         #TODO
174 |         # removed = []
175 |         # for ticker,error in errors.items():
176 |         #     if error == "No data found for this date range, symbol may be delisted":
177 |         #         if executionContext.tickerRepository.remove(ticker):
178 |         #             removed.append(ticker)
179 |         # if len(removed) > 0:
180 |         #     logging.debug(f"Removed {','.join(removed)} from the repository!")
181 |     
182 |                 
183 | def flow(exchange:str,interval:str,executionContext:ExecutionContext,now:datetime):
184 |     """
185 |     Flow to download data for a given exchange and interval
186 |     """
187 |     interval_type = get_interval(interval)
188 |     time_delta = interval_to_timedelta(interval)
189 |     
190 |     tickers = list(executionContext.tickerRepository.exchanges[exchange].values())
191 |     if len(tickers) == 0:
192 |         raise ValueError(f"No tickers found for exchange {exchange}")
193 |     
194 |     #First we check if the ticker is in the database if not we download the max from YFinance and add it
195 |     existing_tickers = executionContext.questClient.get_existing_tickers_for_interval(interval,exchange)
196 |     tickers_to_gather = difference(existing_tickers,tickers)
197 |     if len(tickers_to_gather) > 0:
198 |         data = None
199 |         if interval_type == IntervalTypes.Daily:
200 |             data,errors = executionContext.yfDataProcider.get_data_from_period([ticker.ticker for ticker in tickers_to_gather],interval)
201 |             handle_errors(errors,executionContext)
202 |             if data is not None:
203 |                 store_points(data,tickers_to_gather,"New Tickers",executionContext,interval,exchange)
204 |         else:
205 |             #Maximum for Intraday is 30 days
206 |             download_in_slices(tickers_to_gather,interval,exchange,now-timedelta(days=29),now,executionContext,) 
207 |               
208 |     #If we already have data for an stock we just download the latest data
209 |     #1. We ignore the stocks we just downloaded
210 |     tickers_to_check = difference([ticker.ticker for ticker in tickers_to_gather],tickers)
211 |     
212 |     #2. Querry the database for the last date we got data for each stock and group by datetime
213 |     tickers_to_gather = {}
214 |     last_entries = executionContext.questClient.get_last_entry_dates(interval)
215 |     for ticker in tickers_to_check:
216 |         if ticker.ticker in last_entries:
217 |             last_date = last_entries[ticker.ticker]
218 |             #this schould never happen
219 |             if last_date == now:
220 |                 continue
221 |             
222 |             if last_date not in tickers_to_gather:
223 |                 tickers_to_gather[last_date] = [ticker]
224 |             else:
225 |                 tickers_to_gather[last_date].append(ticker) 
226 |         else:
227 |             logging.warning(f"Can't find last date for {ticker.ticker}!")
228 | 
229 |     #3. Download the data from YFinance
230 |     for date in tickers_to_gather:
231 |         if now-date < time_delta:
232 |             #We cant download data from the future (e.g we want to download an interval of 7days => we can only downlaod 7 days after the last date)
233 |             continue
234 |         
235 |         batched_tickers = tickers_to_gather[date]
236 |         batched_tickers_names = [ticker.ticker for ticker in batched_tickers]
237 |         data = None
238 |         
239 |         if interval_type == IntervalTypes.Intraday and now-date > timedelta(days=6):
240 |             if now-date > timedelta(days=30):
241 |                 #we can only get the last 30 days
242 |                 logging.warning(f"[WARNING] The last entry for {','.join(batched_tickers_names)} is older than 30 days! Only  the last 30 days will be downloaded!")
243 |                 download_in_slices(batched_tickers,interval,exchange,now-timedelta(days=29),now,executionContext,include_start=False)  
244 |             else:
245 |                 #we have to download in slices
246 |                 download_in_slices(batched_tickers,interval,exchange,date,now,executionContext,include_start=False)          
247 |         else:
248 |             data,errors = executionContext.yfDataProcider.get_data(batched_tickers_names,date,now,interval)
249 |             handle_errors(errors,executionContext)
250 |             if data is not None:
251 |                 store_points(data,batched_tickers,"Existing Tickers",executionContext,interval,exchange,date)
252 | 


--------------------------------------------------------------------------------
/src/news/src/finance_news_scraper/mongo_client.py:
--------------------------------------------------------------------------------
  1 | import pymongo
  2 | import os
  3 | from finance_news_scraper.news_sources import News_Item
  4 | from newspaper import Article
  5 | import numpy as np
  6 | from pymongo import ASCENDING, DESCENDING
  7 | from pymongo.typings import _CollationIn, _DocumentIn, _DocumentType, _Pipeline
  8 | from pymongo.cursor import Cursor
  9 | from typing import Optional
 10 | from datetime import datetime
 11 | import pandas as pd
 12 | 
 13 | HOST = os.getenv('NEWSSCRAPER_MONGODB_HOST',"localhost")
 14 | PORT = int(os.getenv('NEWSSCRAPER_MONGODB_PORT',"27017"))
 15 | USERNAME = os.getenv('NEWSSCRAPER_MONGODB_USERNAME',"admin")
 16 | PASSWORD = os.getenv('NEWSSCRAPER_MONGODB_PASSWORD',"asda2sdqw12e4asfd")
 17 | DB_NAME = os.getenv('NEWSSCRAPER_MONGODB_DBNAME',"news")
 18 | ARTICLE_COLLECTION_NAME = os.getenv('NEWSSCRAPER_MONGODB_ARTICLE_COLLECTIONNAME',"articles")
 19 | SENTIMENT_COLLECTION_NAME = os.getenv('NEWSSCRAPER_MONGODB_SENTIMENT_COLLECTIONNAME',"sentiments")
 20 | 
 21 | class MongoDBClient(object):
 22 |     def __init__(self,host:str=HOST,port:int=PORT,username:str=USERNAME,password:str=PASSWORD,better_compression=True) -> None:
 23 |         self.client = pymongo.MongoClient(host=host, port=port, username=username, password=password)
 24 |         
 25 |         #create the db and collections with indexes
 26 |         self.db = self.client[DB_NAME]
 27 |         collections = self.db.list_collection_names()
 28 |         if ARTICLE_COLLECTION_NAME not in collections:
 29 |             if better_compression:
 30 |                 self.db.create_collection(ARTICLE_COLLECTION_NAME,storageEngine={"wiredTiger": {"configString": "block_compressor=zstd"}})
 31 |             else:
 32 |                 self.db.create_collection(ARTICLE_COLLECTION_NAME)
 33 |             self.article_collection = self.db[ARTICLE_COLLECTION_NAME]
 34 |             self.article_collection.create_index([("date",DESCENDING)],background=True)
 35 |             self.article_collection.create_index([("hash",ASCENDING)],background=True)
 36 |         else:
 37 |             self.article_collection = self.db[ARTICLE_COLLECTION_NAME]
 38 | 
 39 |             
 40 |         if SENTIMENT_COLLECTION_NAME not in collections:
 41 |             if better_compression:
 42 |                 self.db.create_collection(SENTIMENT_COLLECTION_NAME,storageEngine={"wiredTiger": {"configString": "block_compressor=zstd"}})
 43 |             else:
 44 |                 self.db.create_collection(SENTIMENT_COLLECTION_NAME)
 45 |             self.sentiment_collection = self.db[SENTIMENT_COLLECTION_NAME]
 46 |             self.sentiment_collection.create_index([("date",DESCENDING)],background=True)
 47 |             self.sentiment_collection.create_index([("article_hash",ASCENDING)],background=True)
 48 |         else:
 49 |             self.sentiment_collection = self.db[SENTIMENT_COLLECTION_NAME]
 50 |             
 51 |             
 52 |     def insert_sentiment(self,sentiment:tuple[int,np.ndarray],hash:str,datetime:datetime,tickers:list[str])->None:
 53 |         data={
 54 |             'article_hash':hash,
 55 |             'tickers':tickers,
 56 |             'date':datetime,
 57 |             'sentiment':{
 58 |                 'class':sentiment[0],
 59 |                 'probabilities':sentiment[1].tolist()
 60 |             } 
 61 |         }
 62 |         self.sentiment_collection.insert_one(data)
 63 |         
 64 |     def build_document(self,item:News_Item,article:Article)->dict:
 65 |         return {
 66 |                 'url': item.link,
 67 |                 'hash': item.hash,
 68 |                 'text': article.text,
 69 |                 'authors': article.authors,
 70 |                 'tickers': item.tickers,
 71 |                 'date': item.pub_date,
 72 |                 'publisher':  item.publisher,
 73 |             }
 74 |         
 75 |     def insert_article(self,data:dict)->None:
 76 |         self.article_collection.insert_one(data)
 77 |         
 78 |     def insert_articles(self,articles:list[dict])->None:
 79 |         if len(articles) > 0:
 80 |             self.article_collection.insert_many(articles)
 81 |         
 82 |     def find_article_by_hash(self,hash:str)->Optional[_DocumentType]:
 83 |         """
 84 |         Returns an article if it exists in the database
 85 |         """
 86 |         return self.article_collection.find_one({'hash':hash})
 87 |     
 88 |     def find_sentiment_by_hash(self,hash:str)->Optional[_DocumentType]:
 89 |         """
 90 |         Returns a sentiment if it exists in the database
 91 |         """
 92 |         return self.sentiment_collection.find_one({'article_hash':hash})
 93 |     
 94 |     def get_all_article_hashes(self)->set[str]:
 95 |         return set(self.article_collection.distinct('hash'))
 96 |     
 97 |     def get_all_sentiment_hashes(self)->set[str]:
 98 |         return set(self.sentiment_collection.distinct('article_hash'))
 99 |     
100 |     def __update_tickers(self,collection,_id:_DocumentIn,old_tickers:list,new_tickers:list)->bool:
101 |         tickers=[]
102 |         needs_update = False
103 |         
104 |         if new_tickers:
105 |             if not old_tickers:
106 |                 needs_update = True
107 |                 tickers = list(set(new_tickers))
108 |             else:
109 |                 if not set(old_tickers) == set(new_tickers):
110 |                     needs_update = True
111 |                     tickers = list(set(old_tickers+new_tickers))
112 |                     
113 |         if needs_update:
114 |             collection.update_one({'_id':_id},{'$set':{'tickers':tickers}})
115 |             return True
116 |         return False
117 |     
118 |     def update_sentiment_tickers(self,_id:_DocumentIn,old_tickers:list,new_tickers:list)->bool:
119 |         """
120 |         Updates the tickers of an sentiment if needed
121 |         """
122 |         return self.__update_tickers(self.sentiment_collection,_id,old_tickers,new_tickers)
123 |         
124 |         
125 |     def update_article_tickers(self,_id:_DocumentIn,old_tickers:list,new_tickers:list)->bool:
126 |         """
127 |         Updates the tickers of an article if needed
128 |         """
129 |         return self.__update_tickers(self.article_collection,_id,old_tickers,new_tickers)
130 |     
131 |     
132 |     def __get_by_tickers_and_date(self,collection,tickers:list[str],start:datetime=None,end:datetime=None)->Cursor[_DocumentType]:
133 |         """
134 |         Find all documents that match the tickers and the date range
135 |         """
136 |         
137 |         tickers = list(ticker.upper() for ticker in set(tickers))
138 |         if len(tickers) < 1:
139 |             raise Exception("A ticker must be provided!")
140 |         
141 |         if start and end:
142 |             result = collection.find({ 'tickers': { '$in': tickers },'date': {'$gte': start,'$lt': end}})
143 |         elif start and not end:
144 |             result = collection.find({ 'tickers': { '$in': tickers },'date': {'$gte': start}})
145 |         elif end and not start:
146 |             result = collection.find({ 'tickers': { '$in': tickers },'date': {'$lt': end}})
147 |         else:
148 |             result = collection.find({ 'tickers': { '$in': tickers }})
149 |         return result.sort('date',DESCENDING)
150 |     
151 |     
152 |     def get_articles(self,tickers:list[str],start:datetime=None,end:datetime=None)->Cursor[_DocumentType]:
153 |         """
154 |         Find all articles that match the tickers and the date range
155 |         """
156 |         return self.__get_by_tickers_and_date(self.article_collection,tickers,start,end)
157 |     
158 |     def get_raw_sentiments(self,tickers:list[str],start:datetime=None,end:datetime=None)->Cursor[_DocumentType]:
159 |         """
160 |         Find all sentiments by tickers and the date range
161 |         """
162 |         return self.__get_by_tickers_and_date(self.sentiment_collection,tickers,start,end)
163 |     
164 |     
165 | 
166 |     def __build_sentiment_dataframe(self,sentiments:list[dict])->pd.DataFrame:
167 |         pd_data = []
168 |         for sentiment in sentiments:
169 |             pd_data.append({"date":sentiment["date"],"sentiment":sentiment["sentiment"]["class"],"certainty":max(sentiment["sentiment"]["probabilities"])})
170 |         return pd.DataFrame(pd_data)
171 | 
172 | 
173 | 
174 |     def _get_weighted_sentiment(self,group:pd.DataFrame) -> float:
175 |             if 'sentiment' in group.columns and 'certainty' in group.columns:
176 |                 return (group['sentiment'] * group['certainty']).mean()
177 |             else:
178 |                 return pd.np.nan
179 |       
180 |       
181 |       
182 |     def _raw_sentiment_to_dataframe(self,
183 |                                     sentiments:list[dict],
184 |                                     frequency:str="d",
185 |                                     fill_blanks:bool=True,
186 |                                     interpolate_values:bool=True,
187 |                                     interpolation:str='linear')->pd.DataFrame:
188 |         #create df
189 |         df = self.__build_sentiment_dataframe(sentiments=sentiments)
190 |         #set the period and mean the sentiment
191 |         df = (df
192 |                 .groupby(df['date'].dt.to_period(frequency).dt.start_time)
193 |                 .apply(lambda x: self._get_weighted_sentiment(x))
194 |                 .reset_index(name='sentiment')
195 |                 .set_index('date'))
196 |         
197 |         #create the times we dont have data for
198 |         if fill_blanks:
199 |             df = df.asfreq(frequency)
200 |             
201 |         #interpolate all nan values   
202 |         if interpolate_values:
203 |             df = df.interpolate(method='linear')
204 |                 
205 |         return df
206 |         
207 |            
208 |     def get_sentiments(self,tickers:str|list[str],
209 |                        start:datetime=None,
210 |                        end:datetime=None,
211 |                        frequency:str="d",
212 |                        fill_blanks:bool=True,
213 |                        interpolate_values:bool=True,
214 |                        interpolation:str='linear')->dict[str,pd.DataFrame]|pd.DataFrame:
215 |         
216 |         """
217 |         Retrievs the mean sentiment for the tickers and date range and returns a dataframe for each ticker.
218 |         
219 |         'frequency': the frequency used to group the data. Can be 'd' for daily, 'w' for weekly, 'm' for monthly.For more Options see here: https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases
220 |         
221 |         'fill_blanks': if True, the dataframe will be expanded to match the provided frequency. All created rows are initized with NaN.
222 |         
223 |         'interpolate_values': if True all NaN values will be interpolated using the interpolation method.
224 |         
225 |         'interpolation': Interpolation method. Can be 'linear' or other methode. See here: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.interpolate.html
226 |         """
227 |         if tickers is None:
228 |             raise Exception("A ticker must be provided!")
229 |         
230 |         if isinstance(tickers,str):
231 |            tickers = [tickers] 
232 |         
233 |         grouped_sentiments = {}
234 |         data = self.get_raw_sentiments(tickers,start,end)
235 |         ticker_set = set(tickers)
236 |         
237 |         #init the grouped sentiments
238 |         for ticker in ticker_set:
239 |             grouped_sentiments[ticker] = []
240 |             
241 |         for sentiment in data:
242 |             #get all tickers that match this sentiment
243 |             sentiment_tickers = set(sentiment['tickers'])
244 |             for matching in ticker_set & sentiment_tickers:
245 |                 grouped_sentiments[matching].append(sentiment)
246 |                 
247 |         #build the dataframes
248 |         
249 |         dataframes = {}
250 |         for ticker in ticker_set:
251 |             df = self._raw_sentiment_to_dataframe(list(grouped_sentiments[ticker]),frequency,fill_blanks,interpolate_values,interpolation)    
252 |             dataframes[ticker] = df
253 |             
254 |         if len(dataframes) == 1:
255 |             return next(iter(dataframes.values()))
256 |         return dataframes
257 | 
258 |         
259 |             
260 |             


--------------------------------------------------------------------------------
/tickers/nasdaq.csv:
--------------------------------------------------------------------------------
  1 | tickers,shortNames,longNames
  2 | BEN,"Franklin Resources, Inc.","Franklin Resources, Inc."
  3 | CI,Cigna Corporation,Cigna Corporation
  4 | CNC,Centene Corporation,Centene Corporation
  5 | FRC,FIRST REPUBLIC BANK,First Republic Bank
  6 | BXP,"Boston Properties, Inc.","Boston Properties, Inc."
  7 | CMG,"Chipotle Mexican Grill, Inc.","Chipotle Mexican Grill, Inc."
  8 | CFG,"Citizens Financial Group, Inc.","Citizens Financial Group, Inc."
  9 | COST,Costco Wholesale Corporation,Costco Wholesale Corporation
 10 | XYL,Xylem Inc.,Xylem Inc.
 11 | T,AT&T Inc.,AT&T Inc.
 12 | META,"Meta Platforms, Inc.","Meta Platforms, Inc."
 13 | DXCM,"DexCom, Inc.","DexCom, Inc."
 14 | BR,"Broadridge Financial Solutions,","Broadridge Financial Solutions, Inc."
 15 | UNH,UnitedHealth Group Incorporated,UnitedHealth Group Incorporated
 16 | ADM,Archer-Daniels-Midland Company,Archer-Daniels-Midland Company
 17 | OTIS,Otis Worldwide Corporation,Otis Worldwide Corporation
 18 | PKG,Packaging Corporation of Americ,Packaging Corporation of America
 19 | SJM,J.M. Smucker Company (The) New,The J. M. Smucker Company
 20 | CF,"CF Industries Holdings, Inc.","CF Industries Holdings, Inc."
 21 | FBHS,"Fortune Brands Home & Security,","Fortune Brands Home & Security, Inc."
 22 | ALB,Albemarle Corporation,Albemarle Corporation
 23 | AVB,"AvalonBay Communities, Inc.","AvalonBay Communities, Inc."
 24 | SRE,DBA Sempra,Sempra
 25 | BSX,Boston Scientific Corporation,Boston Scientific Corporation
 26 | CB,Chubb Limited,Chubb Limited
 27 | ECL,Ecolab Inc.,Ecolab Inc.
 28 | PHM,"PulteGroup, Inc.","PulteGroup, Inc."
 29 | NDAQ,"Nasdaq, Inc.","Nasdaq, Inc."
 30 | TWTR,"Twitter, Inc.","Twitter, Inc."
 31 | EXPE,"Expedia Group, Inc.","Expedia Group, Inc."
 32 | DRI,"Darden Restaurants, Inc.","Darden Restaurants, Inc."
 33 | TT,Trane Technologies plc,Trane Technologies plc
 34 | MMC,"Marsh & McLennan Companies, Inc","Marsh & McLennan Companies, Inc."
 35 | CCI,Crown Castle Inc.,Crown Castle Inc.
 36 | REGN,"Regeneron Pharmaceuticals, Inc.","Regeneron Pharmaceuticals, Inc."
 37 | STT,State Street Corporation,State Street Corporation
 38 | VRTX,Vertex Pharmaceuticals Incorpor,Vertex Pharmaceuticals Incorporated
 39 | BMY,Bristol-Myers Squibb Company,Bristol-Myers Squibb Company
 40 | AEP,American Electric Power Company,"American Electric Power Company, Inc."
 41 | CAT,"Caterpillar, Inc.",Caterpillar Inc.
 42 | FCX,"Freeport-McMoRan, Inc.",Freeport-McMoRan Inc.
 43 | DVA,DaVita Inc.,DaVita Inc.
 44 | LUMN,"Lumen Technologies, Inc.","Lumen Technologies, Inc."
 45 | COF,Capital One Financial Corporati,Capital One Financial Corporation
 46 | XEL,Xcel Energy Inc.,Xcel Energy Inc.
 47 | TEL,TE Connectivity Ltd. New Switze,TE Connectivity Ltd.
 48 | PPL,PPL Corporation,PPL Corporation
 49 | SCHW,Charles Schwab Corporation (The,The Charles Schwab Corporation
 50 | DOW,Dow Inc.,Dow Inc.
 51 | IDXX,"IDEXX Laboratories, Inc.","IDEXX Laboratories, Inc."
 52 | NUE,Nucor Corporation,Nucor Corporation
 53 | STX,Seagate Technology Holdings PLC,Seagate Technology Holdings plc
 54 | LHX,"L3Harris Technologies, Inc.","L3Harris Technologies, Inc."
 55 | DD,"DuPont de Nemours, Inc.","DuPont de Nemours, Inc."
 56 | FMC,FMC Corporation,FMC Corporation
 57 | MAR,Marriott International,"Marriott International, Inc."
 58 | KR,Kroger Company (The),The Kroger Co.
 59 | ACN,Accenture plc,Accenture plc
 60 | SEE,Sealed Air Corporation,Sealed Air Corporation
 61 | WTW,Willis Towers Watson Public Lim,Willis Towers Watson Public Limited Company
 62 | MDLZ,"Mondelez International, Inc.","Mondelez International, Inc."
 63 | HBAN,Huntington Bancshares Incorpora,Huntington Bancshares Incorporated
 64 | VRSN,"VeriSign, Inc.","VeriSign, Inc."
 65 | UNP,Union Pacific Corporation,Union Pacific Corporation
 66 | EL,"Estee Lauder Companies, Inc. (T",The Estée Lauder Companies Inc.
 67 | PVH,PVH Corp.,PVH Corp.
 68 | ULTA,"Ulta Beauty, Inc.","Ulta Beauty, Inc."
 69 | PAYC,"Paycom Software, Inc.","Paycom Software, Inc."
 70 | FOXA,Fox Corporation,Fox Corporation
 71 | RMD,ResMed Inc.,ResMed Inc.
 72 | HIG,Hartford Financial Services Gro,"The Hartford Financial Services Group, Inc."
 73 | EW,Edwards Lifesciences Corporatio,Edwards Lifesciences Corporation
 74 | DG,Dollar General Corporation,Dollar General Corporation
 75 | WDC,Western Digital Corporation,Western Digital Corporation
 76 | QRVO,"Qorvo, Inc.","Qorvo, Inc."
 77 | ETN,"Eaton Corporation, PLC",Eaton Corporation plc
 78 | EFX,"Equifax, Inc.",Equifax Inc.
 79 | GNRC,Generac Holdlings Inc.,Generac Holdings Inc.
 80 | APA,APA Corporation,APA Corporation
 81 | BRO,"Brown & Brown, Inc.","Brown & Brown, Inc."
 82 | FOX,Fox Corporation,Fox Corporation
 83 | ATO,Atmos Energy Corporation,Atmos Energy Corporation
 84 | ADBE,Adobe Inc.,Adobe Inc.
 85 | XOM,Exxon Mobil Corporation,Exxon Mobil Corporation
 86 | PFG,Principal Financial Group Inc,"Principal Financial Group, Inc."
 87 | CME,CME Group Inc.,CME Group Inc.
 88 | IRM,Iron Mountain Incorporated (Del,Iron Mountain Incorporated
 89 | LDOS,"Leidos Holdings, Inc.","Leidos Holdings, Inc."
 90 | FITB,Fifth Third Bancorp,Fifth Third Bancorp
 91 | ETR,Entergy Corporation,Entergy Corporation
 92 | CBRE,CBRE Group Inc,"CBRE Group, Inc."
 93 | PSA,Public Storage,Public Storage
 94 | NVR,"NVR, Inc.","NVR, Inc."
 95 | MSFT,Microsoft Corporation,Microsoft Corporation
 96 | COP,ConocoPhillips,ConocoPhillips
 97 | CZR,"Caesars Entertainment, Inc.","Caesars Entertainment, Inc."
 98 | MO,"Altria Group, Inc.","Altria Group, Inc."
 99 | RHI,Robert Half International Inc.,Robert Half International Inc.
100 | ROL,"Rollins, Inc.","Rollins, Inc."
101 | WYNN,"Wynn Resorts, Limited","Wynn Resorts, Limited"
102 | TXT,Textron Inc.,Textron Inc.
103 | AAPL,Apple Inc.,Apple Inc.
104 | NEM,Newmont Corporation,Newmont Corporation
105 | SEDG,"SolarEdge Technologies, Inc.","SolarEdge Technologies, Inc."
106 | CE,Celanese Corporation Celanese C,Celanese Corporation
107 | EMR,Emerson Electric Company,Emerson Electric Co.
108 | ON,ON Semiconductor Corporation,ON Semiconductor Corporation
109 | MNST,Monster Beverage Corporation,Monster Beverage Corporation
110 | NKE,"Nike, Inc.","NIKE, Inc."
111 | GD,General Dynamics Corporation,General Dynamics Corporation
112 | IT,"Gartner, Inc.","Gartner, Inc."
113 | LRCX,Lam Research Corporation,Lam Research Corporation
114 | GRMN,Garmin Ltd.,Garmin Ltd.
115 | PEP,"Pepsico, Inc.","PepsiCo, Inc."
116 | EXR,Extra Space Storage Inc,Extra Space Storage Inc.
117 | MCO,Moody's Corporation,Moody's Corporation
118 | CBOE,"Cboe Global Markets, Inc.","Cboe Global Markets, Inc."
119 | JCI,Johnson Controls International ,Johnson Controls International plc
120 | VZ,Verizon Communications Inc.,Verizon Communications Inc.
121 | MMM,3M Company,3M Company
122 | ICE,Intercontinental Exchange Inc.,"Intercontinental Exchange, Inc."
123 | FLT,"FleetCor Technologies, Inc.","FLEETCOR Technologies, Inc."
124 | AMT,American Tower Corporation (REI,American Tower Corporation
125 | TRMB,Trimble Inc.,Trimble Inc.
126 | CTLT,"Catalent, Inc.","Catalent, Inc."
127 | DHI,"D.R. Horton, Inc.","D.R. Horton, Inc."
128 | DHR,Danaher Corporation,Danaher Corporation
129 | BWA,BorgWarner Inc.,BorgWarner Inc.
130 | PCAR,PACCAR Inc.,PACCAR Inc
131 | CSCO,"Cisco Systems, Inc.","Cisco Systems, Inc."
132 | O,Realty Income Corporation,Realty Income Corporation
133 | VICI,VICI Properties Inc.,VICI Properties Inc.
134 | TSN,"Tyson Foods, Inc.","Tyson Foods, Inc."
135 | CEG,Constellation Energy Corporatio,Constellation Energy Corporation
136 | JKHY,"Jack Henry & Associates, Inc.","Jack Henry & Associates, Inc."
137 | CTVA,"Corteva, Inc.","Corteva, Inc."
138 | DVN,Devon Energy Corporation,Devon Energy Corporation
139 | MAS,Masco Corporation,Masco Corporation
140 | FANG,"Diamondback Energy, Inc. - Comm","Diamondback Energy, Inc."
141 | MDT,Medtronic plc.,Medtronic plc
142 | CPB,Campbell Soup Company,Campbell Soup Company
143 | PENN,"PENN Entertainment, Inc.","PENN Entertainment, Inc."
144 | STZ,"Constellation Brands, Inc.","Constellation Brands, Inc."
145 | AMZN,"Amazon.com, Inc.","Amazon.com, Inc."
146 | GLW,Corning Incorporated,Corning Incorporated
147 | SNPS,"Synopsys, Inc.","Synopsys, Inc."
148 | MTCH,"Match Group, Inc.","Match Group, Inc."
149 | YUM,"Yum! Brands, Inc.","Yum! Brands, Inc."
150 | BBY,"Best Buy Co., Inc.","Best Buy Co., Inc."
151 | BLK,"BlackRock, Inc.","BlackRock, Inc."
152 | ILMN,"Illumina, Inc.","Illumina, Inc."
153 | AFL,AFLAC Incorporated,Aflac Incorporated
154 | ORCL,Oracle Corporation,Oracle Corporation
155 | GIS,"General Mills, Inc.","General Mills, Inc."
156 | TRV,"The Travelers Companies, Inc.","The Travelers Companies, Inc."
157 | RJF,"Raymond James Financial, Inc.","Raymond James Financial, Inc."
158 | SNA,Snap-On Incorporated,Snap-on Incorporated
159 | GPC,Genuine Parts Company,Genuine Parts Company
160 | WRB,W.R. Berkley Corporation,W. R. Berkley Corporation
161 | ETSY,"Etsy, Inc.","Etsy, Inc."
162 | CMI,Cummins Inc.,Cummins Inc.
163 | EXC,Exelon Corporation,Exelon Corporation
164 | MAA,Mid-America Apartment Communiti,"Mid-America Apartment Communities, Inc."
165 | MOS,Mosaic Company (The),The Mosaic Company
166 | PLD,"Prologis, Inc.","Prologis, Inc."
167 | AIG,"American International Group, I","American International Group, Inc."
168 | DLR,"Digital Realty Trust, Inc.","Digital Realty Trust, Inc."
169 | CINF,Cincinnati Financial Corporatio,Cincinnati Financial Corporation
170 | MPWR,"Monolithic Power Systems, Inc.","Monolithic Power Systems, Inc."
171 | AAL,"American Airlines Group, Inc.",American Airlines Group Inc.
172 | FDS,FactSet Research Systems Inc.,FactSet Research Systems Inc.
173 | LH,Laboratory Corporation of Ameri,Laboratory Corporation of America Holdings
174 | ABMD,"ABIOMED, Inc.","Abiomed, Inc."
175 | FDX,FedEx Corporation,FedEx Corporation
176 | MTB,M&T Bank Corporation,M&T Bank Corporation
177 | TYL,"Tyler Technologies, Inc.","Tyler Technologies, Inc."
178 | BF-B,Brown Forman Inc,Brown-Forman Corporation
179 | EQR,Equity Residential,Equity Residential
180 | AWK,"American Water Works Company, I","American Water Works Company, Inc."
181 | FTNT,"Fortinet, Inc.","Fortinet, Inc."
182 | WBA,"Walgreens Boots Alliance, Inc.","Walgreens Boots Alliance, Inc."
183 | CMA,Comerica Incorporated,Comerica Incorporated
184 | AEE,Ameren Corporation,Ameren Corporation
185 | FE,FirstEnergy Corp.,FirstEnergy Corp.
186 | AES,The AES Corporation,The AES Corporation
187 | IP,International Paper Company,International Paper Company
188 | PXD,Pioneer Natural Resources Compa,Pioneer Natural Resources Company
189 | OKE,"ONEOK, Inc.","ONEOK, Inc."
190 | QCOM,QUALCOMM Incorporated,QUALCOMM Incorporated
191 | MCK,McKesson Corporation,McKesson Corporation
192 | ROK,"Rockwell Automation, Inc.","Rockwell Automation, Inc."
193 | RE,"Everest Re Group, Ltd.","Everest Re Group, Ltd."
194 | CL,Colgate-Palmolive Company,Colgate-Palmolive Company
195 | HD,"Home Depot, Inc. (The)","The Home Depot, Inc."
196 | AMGN,Amgen Inc.,Amgen Inc.
197 | ED,"Consolidated Edison, Inc.","Consolidated Edison, Inc."
198 | KHC,The Kraft Heinz Company,The Kraft Heinz Company
199 | NRG,"NRG Energy, Inc.","NRG Energy, Inc."
200 | PFE,"Pfizer, Inc.",Pfizer Inc.
201 | PNC,"PNC Financial Services Group, I","The PNC Financial Services Group, Inc."
202 | PKI,"PerkinElmer, Inc.","PerkinElmer, Inc."
203 | ISRG,"Intuitive Surgical, Inc.","Intuitive Surgical, Inc."
204 | BAX,Baxter International Inc.,Baxter International Inc.
205 | DISH,DISH Network Corporation,DISH Network Corporation
206 | D,"Dominion Energy, Inc.","Dominion Energy, Inc."
207 | SBUX,Starbucks Corporation,Starbucks Corporation
208 | XRAY,DENTSPLY SIRONA Inc.,DENTSPLY SIRONA Inc.
209 | CVS,CVS Health Corporation,CVS Health Corporation
210 | CCL,Carnival Corporation,Carnival Corporation & plc
211 | APD,"Air Products and Chemicals, Inc","Air Products and Chemicals, Inc."
212 | BKR,Baker Hughes Company,Baker Hughes Company
213 | GE,General Electric Company,General Electric Company
214 | HUM,Humana Inc.,Humana Inc.
215 | INTU,Intuit Inc.,Intuit Inc.
216 | LW,"Lamb Weston Holdings, Inc.","Lamb Weston Holdings, Inc."
217 | WY,Weyerhaeuser Company,Weyerhaeuser Company
218 | NSC,Norfolk Southern Corporation,Norfolk Southern Corporation
219 | NWL,Newell Brands Inc.,Newell Brands Inc.
220 | DOV,Dover Corporation,Dover Corporation
221 | NLSN,Nielsen N.V.,Nielsen Holdings plc
222 | EQIX,"Equinix, Inc.","Equinix, Inc."
223 | MRNA,"Moderna, Inc.","Moderna, Inc."
224 | ABBV,AbbVie Inc.,AbbVie Inc.
225 | RCL,D/B/A Royal Caribbean Cruises L,Royal Caribbean Cruises Ltd.
226 | PRU,"Prudential Financial, Inc.","Prudential Financial, Inc."
227 | DFS,Discover Financial Services,Discover Financial Services
228 | HRL,Hormel Foods Corporation,Hormel Foods Corporation
229 | PSX,Phillips 66,Phillips 66
230 | PEAK,"Healthpeak Properties, Inc.","Healthpeak Properties, Inc."
231 | ENPH,"Enphase Energy, Inc.","Enphase Energy, Inc."
232 | WHR,Whirlpool Corporation,Whirlpool Corporation
233 | GWW,"W.W. Grainger, Inc.","W.W. Grainger, Inc."
234 | CDNS,"Cadence Design Systems, Inc.","Cadence Design Systems, Inc."
235 | SYK,Stryker Corporation,Stryker Corporation
236 | RF,Regions Financial Corporation,Regions Financial Corporation
237 | KMX,CarMax Inc,"CarMax, Inc."
238 | GOOG,Alphabet Inc.,Alphabet Inc.
239 | EA,Electronic Arts Inc.,Electronic Arts Inc.
240 | CLX,Clorox Company (The),The Clorox Company
241 | FFIV,"F5, Inc.","F5, Inc."
242 | L,Loews Corporation,Loews Corporation
243 | FRT,Federal Realty Investment Trust,Federal Realty Investment Trust
244 | CHRW,"C.H. Robinson Worldwide, Inc.","C.H. Robinson Worldwide, Inc."
245 | PWR,"Quanta Services, Inc.","Quanta Services, Inc."
246 | AJG,Arthur J. Gallagher & Co.,Arthur J. Gallagher & Co.
247 | BBWI,"Bath & Body Works, Inc.","Bath & Body Works, Inc."
248 | GOOGL,Alphabet Inc.,Alphabet Inc.
249 | LEN,Lennar Corporation,Lennar Corporation
250 | CARR,Carrier Global Corporation,Carrier Global Corporation
251 | ZTS,Zoetis Inc.,Zoetis Inc.
252 | SIVB,SVB Financial Group,SVB Financial Group
253 | TFX,Teleflex Incorporated,Teleflex Incorporated
254 | PNR,Pentair plc.,Pentair plc
255 | JNPR,"Juniper Networks, Inc.","Juniper Networks, Inc."
256 | TAP,Molson Coors Beverage Company,Molson Coors Beverage Company
257 | SYY,Sysco Corporation,Sysco Corporation
258 | HAS,"Hasbro, Inc.","Hasbro, Inc."
259 | VLO,Valero Energy Corporation,Valero Energy Corporation
260 | MCHP,Microchip Technology Incorporat,Microchip Technology Incorporated
261 | GS,"Goldman Sachs Group, Inc. (The)","The Goldman Sachs Group, Inc."
262 | SYF,Synchrony Financial,Synchrony Financial
263 | RL,Ralph Lauren Corporation,Ralph Lauren Corporation
264 | PG,Procter & Gamble Company (The),The Procter & Gamble Company
265 | PGR,Progressive Corporation (The),The Progressive Corporation
266 | MRO,Marathon Oil Corporation,Marathon Oil Corporation
267 | NXPI,NXP Semiconductors N.V.,NXP Semiconductors N.V.
268 | BKNG,Booking Holdings Inc. Common St,Booking Holdings Inc.
269 | PH,Parker-Hannifin Corporation,Parker-Hannifin Corporation
270 | BA,Boeing Company (The),The Boeing Company
271 | ARE,Alexandria Real Estate Equities,"Alexandria Real Estate Equities, Inc."
272 | KLAC,KLA Corporation,KLA Corporation
273 | ALL,Allstate Corporation (The),The Allstate Corporation
274 | LOW,"Lowe's Companies, Inc.","Lowe's Companies, Inc."
275 | ODFL,"Old Dominion Freight Line, Inc.","Old Dominion Freight Line, Inc."
276 | PAYX,"Paychex, Inc.","Paychex, Inc."
277 | OMC,Omnicom Group Inc.,Omnicom Group Inc.
278 | AZO,"AutoZone, Inc.","AutoZone, Inc."
279 | ITW,Illinois Tool Works Inc.,Illinois Tool Works Inc.
280 | ANSS,"ANSYS, Inc.","ANSYS, Inc."
281 | AON,Aon plc,Aon plc
282 | SBNY,Signature Bank,Signature Bank
283 | TSLA,"Tesla, Inc.","Tesla, Inc."
284 | KIM,Kimco Realty Corporation,Kimco Realty Corporation
285 | COO,"The Cooper Companies, Inc.","The Cooper Companies, Inc."
286 | LNC,Lincoln National Corporation,Lincoln National Corporation
287 | SWK,"Stanley Black & Decker, Inc.","Stanley Black & Decker, Inc."
288 | CRL,Charles River Laboratories Inte,"Charles River Laboratories International, Inc."
289 | HES,Hess Corporation,Hess Corporation
290 | PARA,Paramount Global,Paramount Global
291 | IBM,International Business Machines,International Business Machines Corporation
292 | MET,"MetLife, Inc.","MetLife, Inc."
293 | SBAC,SBA Communications Corporation,SBA Communications Corporation
294 | STE,STERIS plc (Ireland),STERIS plc
295 | TFC,Truist Financial Corporation,Truist Financial Corporation
296 | KMB,Kimberly-Clark Corporation,Kimberly-Clark Corporation
297 | BALL,Ball Corporation,Ball Corporation
298 | KMI,"Kinder Morgan, Inc.","Kinder Morgan, Inc."
299 | CHD,"Church & Dwight Company, Inc.","Church & Dwight Co., Inc."
300 | DE,Deere & Company,Deere & Company
301 | CPT,Camden Property Trust,Camden Property Trust
302 | DRE,Duke Realty Corporation,Duke Realty Corporation
303 | JBHT,"J.B. Hunt Transport Services, I","J.B. Hunt Transport Services, Inc."
304 | ADP,"Automatic Data Processing, Inc.","Automatic Data Processing, Inc."
305 | VFC,V.F. Corporation,V.F. Corporation
306 | ROST,"Ross Stores, Inc.","Ross Stores, Inc."
307 | JPM,JP Morgan Chase & Co.,JPMorgan Chase & Co.
308 | HSY,The Hershey Company,The Hershey Company
309 | PYPL,"PayPal Holdings, Inc.","PayPal Holdings, Inc."
310 | ORLY,"O'Reilly Automotive, Inc.","O'Reilly Automotive, Inc."
311 | TROW,"T. Rowe Price Group, Inc.","T. Rowe Price Group, Inc."
312 | EMN,Eastman Chemical Company,Eastman Chemical Company
313 | AKAM,"Akamai Technologies, Inc.","Akamai Technologies, Inc."
314 | GPN,Global Payments Inc.,Global Payments Inc.
315 | NI,NiSource Inc,NiSource Inc.
316 | HON,Honeywell International Inc.,Honeywell International Inc.
317 | LUV,Southwest Airlines Company,Southwest Airlines Co.
318 | MLM,"Martin Marietta Materials, Inc.","Martin Marietta Materials, Inc."
319 | DGX,Quest Diagnostics Incorporated,Quest Diagnostics Incorporated
320 | AMD,"Advanced Micro Devices, Inc.","Advanced Micro Devices, Inc."
321 | NLOK,NortonLifeLock Inc.,NortonLifeLock Inc.
322 | OGN,Organon & Co.,Organon & Co.
323 | NWS,News Corporation,News Corporation
324 | APH,Amphenol Corporation,Amphenol Corporation
325 | AOS,A.O. Smith Corporation,A. O. Smith Corporation
326 | LYB,LyondellBasell Industries NV,LyondellBasell Industries N.V.
327 | SO,Southern Company (The),The Southern Company
328 | AVGO,Broadcom Inc.,Broadcom Inc.
329 | ALLE,Allegion plc,Allegion plc
330 | HWM,Howmet Aerospace Inc.,Howmet Aerospace Inc.
331 | CTAS,Cintas Corporation,Cintas Corporation
332 | PEG,Public Service Enterprise Group,Public Service Enterprise Group Incorporated
333 | HOLX,"Hologic, Inc.","Hologic, Inc."
334 | VRSK,"Verisk Analytics, Inc.","Verisk Analytics, Inc."
335 | DPZ,Domino's Pizza Inc,"Domino's Pizza, Inc."
336 | WM,"Waste Management, Inc.","Waste Management, Inc."
337 | TMUS,"T-Mobile US, Inc.","T-Mobile US, Inc."
338 | WST,"West Pharmaceutical Services, I","West Pharmaceutical Services, Inc."
339 | EOG,"EOG Resources, Inc.","EOG Resources, Inc."
340 | F,Ford Motor Company,Ford Motor Company
341 | MCD,McDonald's Corporation,McDonald's Corporation
342 | TECH,Bio-Techne Corp,Bio-Techne Corporation
343 | LYV,"Live Nation Entertainment, Inc.","Live Nation Entertainment, Inc."
344 | CDAY,Ceridian HCM Holding Inc.,Ceridian HCM Holding Inc.
345 | HCA,"HCA Healthcare, Inc.","HCA Healthcare, Inc."
346 | PPG,"PPG Industries, Inc.","PPG Industries, Inc."
347 | DTE,DTE Energy Company,DTE Energy Company
348 | HST,Host Hotels,"Host Hotels & Resorts, Inc."
349 | ADI,"Analog Devices, Inc.","Analog Devices, Inc."
350 | PNW,Pinnacle West Capital Corporati,Pinnacle West Capital Corporation
351 | V,Visa Inc.,Visa Inc.
352 | NFLX,"Netflix, Inc.","Netflix, Inc."
353 | MPC,Marathon Petroleum Corporation,Marathon Petroleum Corporation
354 | EVRG,"Evergy, Inc.","Evergy, Inc."
355 | AME,"AMETEK, Inc.","AMETEK, Inc."
356 | DIS,Walt Disney Company (The),The Walt Disney Company
357 | VMC,Vulcan Materials Company (Holdi,Vulcan Materials Company
358 | MRK,"Merck & Company, Inc.","Merck & Co., Inc."
359 | DUK,Duke Energy Corporation (Holdin,Duke Energy Corporation
360 | NDSN,Nordson Corporation,Nordson Corporation
361 | AAP,Advance Auto Parts Inc.,"Advance Auto Parts, Inc."
362 | CMCSA,Comcast Corporation,Comcast Corporation
363 | INCY,Incyte Corporation,Incyte Corporation
364 | SPG,"Simon Property Group, Inc.","Simon Property Group, Inc."
365 | CDW,CDW Corporation,CDW Corporation
366 | EPAM,"EPAM Systems, Inc.","EPAM Systems, Inc."
367 | DLTR,"Dollar Tree, Inc.","Dollar Tree, Inc."
368 | SWKS,"Skyworks Solutions, Inc.","Skyworks Solutions, Inc."
369 | IPG,"Interpublic Group of Companies,","The Interpublic Group of Companies, Inc."
370 | CRM,"Salesforce, Inc.","Salesforce, Inc."
371 | NOW,"ServiceNow, Inc.","ServiceNow, Inc."
372 | GM,General Motors Company,General Motors Company
373 | APTV,Aptiv PLC,Aptiv PLC
374 | UPS,"United Parcel Service, Inc.","United Parcel Service, Inc."
375 | IFF,International Flavors & Fragran,International Flavors & Fragrances Inc.
376 | CSX,CSX Corporation,CSX Corporation
377 | HLT,Hilton Worldwide Holdings Inc.,Hilton Worldwide Holdings Inc.
378 | WFC,Wells Fargo & Company,Wells Fargo & Company
379 | FTV,Fortive Corporation,Fortive Corporation
380 | RTX,Raytheon Technologies Corporati,Raytheon Technologies Corporation
381 | BDX,"Becton, Dickinson and Company","Becton, Dickinson and Company"
382 | CMS,CMS Energy Corporation,CMS Energy Corporation
383 | ALGN,"Align Technology, Inc.","Align Technology, Inc."
384 | C,"Citigroup, Inc.",Citigroup Inc.
385 | EXPD,Expeditors International of Was,"Expeditors International of Washington, Inc."
386 | J,Jacobs Engineering Group Inc.,Jacobs Engineering Group Inc.
387 | MOH,Molina Healthcare Inc,"Molina Healthcare, Inc."
388 | VTR,"Ventas, Inc.","Ventas, Inc."
389 | DAL,"Delta Air Lines, Inc.","Delta Air Lines, Inc."
390 | JNJ,Johnson & Johnson,Johnson & Johnson
391 | MTD,"Mettler-Toledo International, I",Mettler-Toledo International Inc.
392 | HII,"Huntington Ingalls Industries, ","Huntington Ingalls Industries, Inc."
393 | MU,"Micron Technology, Inc.","Micron Technology, Inc."
394 | WELL,Welltower Inc.,Welltower Inc.
395 | BIIB,Biogen Inc.,Biogen Inc.
396 | ATVI,"Activision Blizzard, Inc","Activision Blizzard, Inc."
397 | TSCO,Tractor Supply Company,Tractor Supply Company
398 | VTRS,Viatris Inc.,Viatris Inc.
399 | URI,"United Rentals, Inc.","United Rentals, Inc."
400 | AVY,Avery Dennison Corporation,Avery Dennison Corporation
401 | FISV,"Fiserv, Inc.","Fiserv, Inc."
402 | NTRS,Northern Trust Corporation,Northern Trust Corporation
403 | MSCI,MSCI Inc,MSCI Inc.
404 | ESS,"Essex Property Trust, Inc.","Essex Property Trust, Inc."
405 | LMT,Lockheed Martin Corporation,Lockheed Martin Corporation
406 | RSG,"Republic Services, Inc.","Republic Services, Inc."
407 | NCLH,Norwegian Cruise Line Holdings ,Norwegian Cruise Line Holdings Ltd.
408 | NEE,"NextEra Energy, Inc.","NextEra Energy, Inc."
409 | GL,Globe Life Inc.,Globe Life Inc.
410 | WRK,Westrock Company,WestRock Company
411 | LNT,Alliant Energy Corporation,Alliant Energy Corporation
412 | TTWO,"Take-Two Interactive Software, ","Take-Two Interactive Software, Inc."
413 | AMP,"Ameriprise Financial, Inc.","Ameriprise Financial, Inc."
414 | TER,"Teradyne, Inc.","Teradyne, Inc."
415 | HAL,Halliburton Company,Halliburton Company
416 | ZBH,"Zimmer Biomet Holdings, Inc.","Zimmer Biomet Holdings, Inc."
417 | DXC,DXC Technology Company,DXC Technology Company
418 | TGT,Target Corporation,Target Corporation
419 | KDP,Keurig Dr Pepper Inc.,Keurig Dr Pepper Inc.
420 | MGM,MGM Resorts International,MGM Resorts International
421 | CTRA,Coterra Energy Inc.,Coterra Energy Inc.
422 | BIO,"Bio-Rad Laboratories, Inc.","Bio-Rad Laboratories, Inc."
423 | CAH,"Cardinal Health, Inc.","Cardinal Health, Inc."
424 | NOC,Northrop Grumman Corporation,Northrop Grumman Corporation
425 | OXY,Occidental Petroleum Corporatio,Occidental Petroleum Corporation
426 | WAT,Waters Corporation,Waters Corporation
427 | USB,U.S. Bancorp,U.S. Bancorp
428 | KEYS,Keysight Technologies Inc.,"Keysight Technologies, Inc."
429 | BK,The Bank of New York Mellon Cor,The Bank of New York Mellon Corporation
430 | WBD,"Warner Bros. Discovery, Inc. - ","Warner Bros. Discovery, Inc."
431 | K,Kellogg Company,Kellogg Company
432 | CVX,Chevron Corporation,Chevron Corporation
433 | TDG,Transdigm Group Incorporated,TransDigm Group Incorporated
434 | ALK,"Alaska Air Group, Inc.","Alaska Air Group, Inc."
435 | BRK-B,Berkshire Hathaway Inc. New,Berkshire Hathaway Inc.
436 | LKQ,LKQ Corporation,LKQ Corporation
437 | FAST,Fastenal Company,Fastenal Company
438 | PTC,PTC Inc.,PTC Inc.
439 | SHW,Sherwin-Williams Company (The),The Sherwin-Williams Company
440 | INTC,Intel Corporation,Intel Corporation
441 | POOL,Pool Corporation,Pool Corporation
442 | ES,Eversource Energy (D/B/A),Eversource Energy
443 | ABT,Abbott Laboratories,Abbott Laboratories
444 | CPRT,"Copart, Inc.","Copart, Inc."
445 | GILD,"Gilead Sciences, Inc.","Gilead Sciences, Inc."
446 | TXN,Texas Instruments Incorporated,Texas Instruments Incorporated
447 | WMB,"Williams Companies, Inc. (The)","The Williams Companies, Inc."
448 | LLY,Eli Lilly and Company,Eli Lilly and Company
449 | IEX,IDEX Corporation,IDEX Corporation
450 | MHK,"Mohawk Industries, Inc.","Mohawk Industries, Inc."
451 | AXP,American Express Company,American Express Company
452 | NVDA,NVIDIA Corporation,NVIDIA Corporation
453 | TDY,Teledyne Technologies Incorpora,Teledyne Technologies Incorporated
454 | MKTX,"MarketAxess Holdings, Inc.",MarketAxess Holdings Inc.
455 | IR,Ingersoll Rand Inc.,Ingersoll Rand Inc.
456 | AMCR,Amcor plc,Amcor plc
457 | A,"Agilent Technologies, Inc.","Agilent Technologies, Inc."
458 | IQV,"IQVIA Holdings, Inc.",IQVIA Holdings Inc.
459 | IVZ,Invesco Ltd,Invesco Ltd.
460 | CTXS,"Citrix Systems, Inc.","Citrix Systems, Inc."
461 | TMO,Thermo Fisher Scientific Inc,Thermo Fisher Scientific Inc.
462 | KO,Coca-Cola Company (The),The Coca-Cola Company
463 | AIZ,"Assurant, Inc.","Assurant, Inc."
464 | HPE,Hewlett Packard Enterprise Comp,Hewlett Packard Enterprise Company
465 | PM,Philip Morris International Inc,Philip Morris International Inc.
466 | UDR,"UDR, Inc.","UDR, Inc."
467 | MA,Mastercard Incorporated,Mastercard Incorporated
468 | TPR,"Tapestry, Inc.","Tapestry, Inc."
469 | KEY,KeyCorp,KeyCorp
470 | NWSA,News Corporation,News Corporation
471 | NTAP,"NetApp, Inc.","NetApp, Inc."
472 | LIN,Linde plc,Linde plc
473 | TJX,"TJX Companies, Inc. (The)","The TJX Companies, Inc."
474 | REG,Regency Centers Corporation,Regency Centers Corporation
475 | CTSH,Cognizant Technology Solutions ,Cognizant Technology Solutions Corporation
476 | ELV,"Elevance Health, Inc.",Elevance Health Inc.
477 | MSI,"Motorola Solutions, Inc.","Motorola Solutions, Inc."
478 | UAL,"United Airlines Holdings, Inc.","United Airlines Holdings, Inc."
479 | CAG,"ConAgra Brands, Inc.","Conagra Brands, Inc."
480 | MS,Morgan Stanley,Morgan Stanley
481 | SLB,Schlumberger N.V.,Schlumberger Limited
482 | UHS,"Universal Health Services, Inc.","Universal Health Services, Inc."
483 | ROP,"Roper Technologies, Inc.","Roper Technologies, Inc."
484 | WAB,Westinghouse Air Brake Technolo,Westinghouse Air Brake Technologies Corporation
485 | FIS,Fidelity National Information S,"Fidelity National Information Services, Inc."
486 | EIX,Edison International,Edison International
487 | HPQ,HP Inc.,HP Inc.
488 | HSIC,"Henry Schein, Inc.","Henry Schein, Inc."
489 | ABC,AmerisourceBergen Corporation,AmerisourceBergen Corporation
490 | MKC,"McCormick & Company, Incorporat","McCormick & Company, Incorporated"
491 | WEC,"WEC Energy Group, Inc.","WEC Energy Group, Inc."
492 | ZBRA,Zebra Technologies Corporation,Zebra Technologies Corporation
493 | EBAY,eBay Inc.,eBay Inc.
494 | ANET,"Arista Networks, Inc.","Arista Networks, Inc."
495 | ADSK,"Autodesk, Inc.","Autodesk, Inc."
496 | LVS,Las Vegas Sands Corp.,Las Vegas Sands Corp.
497 | ZION,Zions Bancorporation N.A.,"Zions Bancorporation, National Association"
498 | SPGI,S&P Global Inc.,S&P Global Inc.
499 | BAC,Bank of America Corporation,Bank of America Corporation
500 | WMT,Walmart Inc.,Walmart Inc.
501 | VNO,Vornado Realty Trust,Vornado Realty Trust
502 | CNP,"CenterPoint Energy, Inc (Holdin","CenterPoint Energy, Inc."
503 | AMAT,"Applied Materials, Inc.","Applied Materials, Inc."
504 | CHTR,"Charter Communications, Inc.","Charter Communications, Inc."
505 | 


--------------------------------------------------------------------------------