├── .gitignore ├── LICENSE ├── README.md ├── autoML.py ├── portfolio.py ├── requirements.txt └── tutorials ├── intro_to_qf_with_python.ipynb ├── neural-prophet.ipynb ├── quantstats.ipynb └── riskfolio.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore the env file (enviroment file that posseses the package installations) 2 | env 3 | 4 | # ignore all .png files --> png files are generated when running the pycaret models 5 | *.png 6 | 7 | # ignore all .logs files --> logs are generated when running pycaret models 8 | *.log 9 | 10 | # ignore lightning_logs generated when using the neural prophet model 11 | lightning_logs 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Jonathan Hofmann 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction to Quant Finance in Python 2 | 3 | Streamlit applications and Google Colaboratory notebooks with introductory material to qf in Python. 4 | 5 | [autoML.py](https://github.com/hofmannj0n/auto-machine-learning/blob/main/myapp.py) file contains streamlit app with automated machine learning 6 | 7 | [portfolio.py](https://github.com/hofmannj0n/auto-machine-learning/blob/main/myapp.py) file contains streamlit app with auto portfolio optimizer & EDA 8 | 9 | [tutorials](https://github.com/hofmannj0n/Introduction-to-Quantitative-Finance-in-Python/tree/main/tutorials) folder contains tutorials for various python packages dealing with financial data and time series modeling 10 | 11 | Inside the tutorials folder you will find notebooks detailing comprehensive looks into common trading algorithims, ARIMA modeling, and some of the more common finance libraries. 12 | 13 | [Video tutorial located here](https://www.youtube.com/watch?v=ofzrvCPRRjw&t=5073s) 14 | 15 | # Prerequisites 16 | 17 | make sure you have a code editor setup with python >= 3.10 and a zsh / bash based shell, VS code is my editor of choice: 18 | 19 | [VS code](https://code.visualstudio.com/) 20 | 21 | [download Python](https://www.python.org/downloads/) 22 | 23 | # Cloning the repository (MacOS) 24 | 25 | 1. clone repository inside code-editor of choice 26 | 27 | 2. setup enviroment in terminal: (make sure to select enviroment as workspace interpreter) 28 | `python3.10 -m venv automl` 29 | 30 | 3. install dependencies 31 | `pip install -r requirements.txt` 32 | 33 | 4. run applications: 34 | `streamlit run autoML.py` 35 | `streamlit run portfolio.py` 36 | 37 | 5. explore the jupyter notebooks located in tutorials to further your understanding of qf 38 | -------------------------------------------------------------------------------- /autoML.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import streamlit as st 3 | import datetime 4 | import plotly.express as px 5 | from pycaret.regression import * 6 | import matplotlib.pyplot as plt 7 | import os 8 | 9 | # global function to create datasets based on user inputs 10 | def create_dataset(stock, start_date, end_date): 11 | stock_list = [stock] 12 | data = yf.download(tickers=stock_list, start=start_date, end=end_date) 13 | data = data.drop('Adj Close', axis=1) 14 | data['Ticker'] = stock 15 | data = data.dropna() 16 | return data 17 | 18 | # cache the conversion to prevent computation on every rerun 19 | @st.cache_data 20 | def convert_df(df): 21 | return df.to_csv().encode('utf-8') 22 | 23 | with st.sidebar: 24 | choice = st.radio('Select one:', [ 25 | 'Tutorial', 26 | 'Data Selection', 27 | 'Data Visualization', 28 | 'Model Selection', 29 | 'Download Model' 30 | ]) 31 | 32 | if choice == "Tutorial": 33 | with st.expander("Automated Model Selection using Pycaret", expanded=False): 34 | st.subheader("Steps:") 35 | st.write("*Data Selection*") 36 | st.write("- Select your market data for analysis") 37 | st.write("*Data Visualization*") 38 | st.write("- Visualize your data to identify a good target column") 39 | st.write("*Model Selection*") 40 | st.write("- Run various machine learning algorithims to determine the best predictor for your selected data") 41 | st.write("*Download Model*") 42 | st.write("- Download your most accurate model for future analysis") 43 | 44 | st.subheader("Created by Jonathan Hofmann ---> [github](https://github.com/hofmannj0n) | [portfolio](https://www.jhofmann.me/)") 45 | st.write("") 46 | st.write("Tutorial:") 47 | 48 | if choice == "Data Selection": 49 | 50 | # data selection variables 51 | stocks = ["AAPL", "GOOGL", "IBM", "MSFT", "VIX", "VOO", "QQQ", "TSLA", "JPM", "AMZN", "VZ", "NVDA", "BAC", "SBUX", "NKE", "MA", "PLTR"] 52 | min_timeframe = datetime.datetime.fromisoformat("2015-01-01") 53 | max_timeframe = datetime.datetime.fromisoformat("2023-11-21") 54 | 55 | # widgets to get user input 56 | st.header('Data Selection', divider='rainbow') 57 | st.write("- Use selectbox and sliders to create your data") 58 | st.write("- Once satisfied with parameters - select \"Generate Data\" button") 59 | st.write("#") 60 | stock = st.selectbox("Select a security for analysis", stocks, key="stock") 61 | st.write("#") 62 | start_date = st.slider("Start date", min_value=min_timeframe, max_value=max_timeframe, key="start_date") 63 | st.write("#") 64 | end_date = st.slider("End date", min_value=start_date, max_value=max_timeframe, key="end_date") 65 | st.write("#") 66 | 67 | if st.button("Generate Data"): 68 | 69 | # create dataset 70 | data = create_dataset(stock, st.session_state.start_date, st.session_state.end_date) 71 | 72 | # save the generated data in st.session_state to be accessed in other sidebar options 73 | st.session_state.data = data 74 | st.success("Data generated and ready for analysis, navigate to Data Visualization in the sidebar") 75 | 76 | # show a preview of generated data 77 | st.write("data preview:") 78 | st.dataframe(data.head(20)) 79 | 80 | if choice == "Data Visualization": 81 | 82 | # check if data is available in session state 83 | if "data" in st.session_state: 84 | data = st.session_state.data 85 | 86 | st.header('Data Visualizations', divider='rainbow') 87 | chosen_target = st.selectbox('Choose column to plot', data.columns[:5]) 88 | 89 | if st.button("Plot"): 90 | df = st.session_state.data.copy() # make a copy to not mess with original dataset 91 | window_size = 30 # 30-day moving avearge window 92 | 93 | # moving average 94 | df['Moving_Avg'] = df[chosen_target].rolling(window=window_size).mean() 95 | 96 | # plot 97 | fig = px.line( 98 | df, 99 | y=chosen_target, 100 | title=f"{chosen_target} Line Chart", 101 | color_discrete_sequence=["#9EE6CF"], 102 | ) 103 | 104 | fig.add_scatter(x=df.index, y=df['Moving_Avg'], mode='lines', name='30-day Moving Avg') 105 | st.plotly_chart(fig, use_container_width=True) 106 | else: 107 | st.warning("No data generated yet. Please select 'Data Selection' and generate data first.") 108 | 109 | if choice == "Model Selection": 110 | 111 | if "data" in st.session_state: 112 | 113 | data = st.session_state.data 114 | chosen_target = st.selectbox('Choose the target column', data.columns[:5]) 115 | 116 | if st.button('Run Modelling'): 117 | 118 | # initializing the model 119 | setup(data, target=chosen_target, session_id = 123) 120 | exp = RegressionExperiment() 121 | exp.setup(data, target = chosen_target, session_id = 123) 122 | best_model = compare_models() 123 | 124 | # display a model summary 125 | st.write("Model Accuracy: (Ranked Most Accurate - to Least Accurate)") 126 | compare_df = pull() 127 | st.dataframe(compare_df.head(20)) 128 | 129 | # display model plot 130 | img = plot_model( 131 | best_model, plot="error", display_format="streamlit", save=True 132 | ) 133 | st.image(img) 134 | 135 | # model predictions on new data 136 | st.write("Model Predictions on New Data: (Using Most Accurate Model)") 137 | stock_predict = predict_model(best_model) 138 | new_data = data.copy() 139 | new_data.drop(chosen_target, axis=1, inplace=True) 140 | predictions = predict_model(best_model, data = new_data) 141 | st.dataframe(predictions.head()) 142 | 143 | # save best model to session_state 144 | st.session_state.best_model = best_model 145 | 146 | else: 147 | st.warning("No data generated yet. Please select 'Data Selection' and generate data first.") 148 | 149 | if choice == "Download Model": 150 | 151 | if "best_model" in st.session_state: 152 | 153 | save_model(st.session_state.best_model, 'my_best_model') 154 | 155 | # path to the model file 156 | model_path = 'my_best_model.pkl' 157 | 158 | # checking if the file exists 159 | if os.path.exists(model_path): 160 | with open(model_path, "rb") as fp: 161 | btn = st.download_button( 162 | label="Download Model", 163 | data=fp, 164 | file_name="best_model.pkl", 165 | mime="application/octet-stream" 166 | ) 167 | else: 168 | st.error("Model file not found") 169 | 170 | st.markdown(":blue[Next Steps:]") 171 | st.write("Check out this [jupyter notebook](https://colab.research.google.com/drive/11NqRw2AIDS8mZ-kjCbjTc36-T_7tnR34#scrollTo=zLIBdzWjdqq3) for a more in depth look at quantitative finance in Python.") 172 | st.write("") 173 | st.write("Loading your best model for further analysis:") 174 | 175 | st.code(""" 176 | from pycaret.utils import load_model 177 | from pycaret.regression import * 178 | 179 | # Load the model you generated using this app 180 | model = load_model('my_best_model') 181 | 182 | predictions = model.predict(new_data) 183 | print(predictions) """, language ="python") 184 | else: 185 | st.warning("No model detected, navigate to Model Selection") -------------------------------------------------------------------------------- /portfolio.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import riskfolio as rp 3 | import yfinance as yf 4 | import matplotlib.pyplot as plt 5 | import pandas as pd 6 | import streamlit as st 7 | import plotly.graph_objs as go 8 | import quantstats as qs 9 | import warnings 10 | import datetime 11 | warnings.filterwarnings("ignore") 12 | 13 | from neuralprophet import NeuralProphet 14 | from ydata_profiling import ProfileReport 15 | from streamlit_pandas_profiling import st_profile_report 16 | 17 | 18 | with st.sidebar: 19 | choice = st.radio("Select One:", [ 20 | "Welcome!", 21 | "ARIMA Price Prediction", 22 | "Portfolio EDA & Optimization", 23 | "Next Steps", 24 | ]) 25 | 26 | if choice == "Welcome!": 27 | with st.expander("Introduction to stock market quantitative analaysis in Python", expanded=False): 28 | st.write("Steps:") 29 | st.write("ARIMA Model Price Prediction") 30 | st.write("- Utilzie ARIMA & [Neural Prophet](https://neuralprophet.com/) to explore trend analysis of your security of choice") 31 | st.write("Portfolio EDA & Optimization:") 32 | st.write("- Explore portfolio optimizing strategies such as Sharpe ratio's and backtesting your optimal portfolio against $SPY") 33 | st.write("Next Steps:") 34 | st.write("- Navigate to this [jupyter notebook](https://colab.research.google.com/drive/11NqRw2AIDS8mZ-kjCbjTc36-T_7tnR34#scrollTo=zLIBdzWjdqq3) to expand your knowledge of market algorithmic analaysis") 35 | 36 | st.subheader("Created by Jonathan Hofmann ---> [github](https://github.com/hofmannj0n) | [portfolio](https://www.jhofmann.me/)") 37 | st.write("") 38 | st.write("Tutorial:") 39 | 40 | if choice == "ARIMA Price Prediction": 41 | st.title("Stock Market Prediction with ARIMA") 42 | 43 | image_url = "https://slideplayer.com/slide/4283195/14/images/69/ARIMA+models+for+time+series+data.jpg" 44 | 45 | link_url = "https://www.investopedia.com/terms/a/autoregressive-integrated-moving-average-arima.asp" 46 | 47 | # HTML code with image sizing 48 | html_str = f""" 49 | 50 | Alt text 51 | 52 | """ 53 | 54 | st.markdown(html_str, unsafe_allow_html=True) 55 | st.divider() 56 | 57 | st.info(""" 58 | The acronym ARIMA stands for AutoRegressive Integrated Moving Average, and it is a widely used forecasting method in financial analysis because it captures essential aspects of financial / time series data such as trends, seasonality, and random fluctuations. 59 | 60 | AutoRegressive (AR): This component leverages the dependencies between an observation and a certain number of lagged observations. It's like recognizing that weather conditions from previous days can influence the forecast for today. 61 | 62 | Integrated (I): This component involves differencing the actual observations to make the time series stationary. This means subtracting the previous observation from the current one to deal with trends in the data. Going off the weather example, it could involve estimating how much the weather changes day by day, rather than the total amount of rainfall. 63 | 64 | Moving Average (MA): This component models the relationship between an observation and a residual error from a moving average model applied to lagged observations. Basically smooths out noise in the data to focus on more substantial trends and cycles. 65 | 66 | The parameters of an ARIMA model are usually denoted as 67 | p,d,q: 68 | 69 | p (Lags of the auto-regressive model): This parameter deals with the lag variables. Continuing with the weather forecasting analogy, you might look at how the past few days weather can give you insights into tomorrow's weather. 70 | 71 | d (Degree of differencing): This involves the number of times the data needs to be differenced to become stationary. It reflects how comparing changes from day to day can help model the data more effectively, especially if the weather has been following a consistent pattern. 72 | 73 | q (Order of the moving average model): This is about the size of the moving average window and is used to smooth out short term fluctuations and highlight longer term trends or cycles in the dataset. 74 | """) 75 | 76 | st.divider() 77 | st.subheader("Select a Stock and Utilize ARIMA Forecasting:") 78 | 79 | # stock input variables 80 | st.write("[List of all Stock Symbols](https://stockanalysis.com/stocks/)") 81 | stock = st.text_input("Enter Stock Symbol", "AAPL") 82 | start_date = st.date_input("Start Date", datetime.datetime(2015, 1, 1)) 83 | end_date = st.date_input("End Date", datetime.datetime.now()) 84 | 85 | if st.button("Predict Stock Close Price"): 86 | 87 | stocks = yf.download(stock, start=start_date, end=end_date) 88 | 89 | # manipulating df to neuralprophet specifications 90 | stocks.reset_index(inplace=True) 91 | stocks = stocks[['Date', 'Close']] 92 | stocks.columns = ['ds', 'y'] 93 | 94 | # initializing the model 95 | m = NeuralProphet() 96 | m.fit(stocks, freq='B', epochs=100) 97 | 98 | future = m.make_future_dataframe(stocks, periods=365) 99 | forecast = m.predict(future) 100 | actual_prediction = m.predict(stocks) 101 | 102 | 103 | fig = go.Figure() 104 | 105 | # actual past data 106 | fig.add_trace(go.Scatter(x=stocks['ds'], y=stocks['y'], mode='lines', name='Actual Close Price', line=dict(color='green'))) 107 | 108 | # predictions on past data 109 | fig.add_trace(go.Scatter(x=actual_prediction['ds'], y=actual_prediction['yhat1'], mode='lines', name='Predicted Close Price', line=dict(color='red'))) 110 | 111 | # future predictions 112 | fig.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat1'], mode='lines', name='Future Predictions', line=dict(color='blue'))) 113 | 114 | # Update plot 115 | fig.update_layout(title='Stock Market Predictions for ' + stock, 116 | xaxis_title='Date', 117 | yaxis_title='Stock Price', 118 | legend_title='Legend', 119 | xaxis_rangeslider_visible=False) 120 | 121 | st.plotly_chart(fig, use_container_width=True) 122 | st.divider() 123 | st.write("Observed, Trend, Seasonality:") 124 | st.plotly_chart(m.plot(forecast), use_container_width=True) 125 | st.plotly_chart(m.plot_components(forecast), use_container_width=True) 126 | st.divider() 127 | st.write("Profile Report:") 128 | profile = ProfileReport(stocks, title="Profiling Report") 129 | st_profile_report(profile) 130 | 131 | if choice == "Portfolio EDA & Optimization": 132 | 133 | # common stocks 134 | stocks = ["AAPL", "GOOGL", "IBM", "MSFT", "VIX", "VOO", "QQQ", "TSLA", "JPM", "AMZN", "VZ", "NVDA", "BAC", "SBUX", "NKE", "MA", "PLTR"] 135 | 136 | st.subheader("Understanding The Sharpe Ratio:") 137 | 138 | image_url = "https://cdn.corporatefinanceinstitute.com/assets/sharpe-ratio.png" 139 | 140 | link_url = "https://corporatefinanceinstitute.com/resources/career-map/sell-side/risk-management/sharpe-ratio-definition-formula/" 141 | 142 | # HTML code with image sizing 143 | html_str = f""" 144 | 145 | Alt text 146 | 147 | """ 148 | 149 | st.markdown(html_str, unsafe_allow_html=True) 150 | st.divider() 151 | 152 | st.info(""" 153 | For Example: A Sharpe ratio of 1.5 indicates that the investment is generating 1.5 units of excess return for each unit of 154 | risk taken, relative to the risk-free rate. It implies better risk-adjusted performance than a lower Sharpe ratio.""") 155 | 156 | stock = st.text_input("Enter Stock Symbol", "AAPL") 157 | if st.button("Get Sharpe Ratio & Historical Returns:"): 158 | 159 | # generating portfolio metrics 160 | stock_sharpe = qs.utils.download_returns(stock) 161 | st.write("Sharpe Ratio:") 162 | st.write(qs.stats.sharpe(stock_sharpe)) 163 | st.write("Historical Returns:") 164 | st.write(qs.stats.monthly_returns(stock_sharpe)) 165 | returns = qs.utils.download_returns(stock) 166 | fig = qs.plots.snapshot(returns, title=f'{stock} Performance', show=False) 167 | st.write(fig) 168 | 169 | st.subheader("Create Optimal Portfolio") 170 | 171 | start = "2015-01-01" 172 | end = "2024-01-01" 173 | 174 | # variables for sharpe analysis 175 | method_mu = "hist" 176 | method_cov = "hist" 177 | hist = True 178 | model = "Classic" 179 | rm = "MV" 180 | obj = "Sharpe" 181 | rf = 0 182 | l = 0 183 | 184 | 185 | options = st.multiselect("Select Stocks For Portfolio", stocks) 186 | 187 | if st.button("Generate Portfolio"): 188 | data = yf.download(options, start=start, end=end) 189 | returns = data["Adj Close"].pct_change().dropna() 190 | 191 | port = rp.Portfolio(returns = returns) 192 | port.assets_stats(methods_mu=method_mu, method_cov=method_cov) 193 | w = port.optimization(model=model, rm=rm, obj=obj, rf=rf, l=l, hist=hist) 194 | 195 | # saving w to session state to be accessed in other sidebar options 196 | st.session_state['w'] = w 197 | 198 | st.write("Asset Weights:") 199 | st.write(w) 200 | 201 | fig, ax = plt.subplots() 202 | rp.plot_pie(w=w, title="Optimal Portfolio", others=0.05, cmap="tab20", ax=ax) 203 | 204 | st.pyplot(fig) 205 | 206 | frontier = port.efficient_frontier(model=model, rm=rm, points=50, rf=rf, hist=hist) 207 | fig, ax = plt.subplots() 208 | rp.plot_frontier(frontier, mu=port.mu, cov=port.cov, returns=returns, rm=rm, rf=rf, cmap="viridis", w=w) 209 | st.pyplot(fig) 210 | 211 | if st.button("Compare Your Portfolio to $SPY") and 'w' in st.session_state: 212 | 213 | w = st.session_state['w'] 214 | 215 | weights_dict = {} 216 | 217 | # iterate through each row in the DataFrame 218 | for ticker, row in w.iterrows(): 219 | 220 | # add the ticker and its weight to the dictionary 221 | weights_dict[ticker] = row['weights'] 222 | 223 | rounded_weights = {ticker: round(weight, 2) for ticker, weight in weights_dict.items()} 224 | 225 | portfolio = rounded_weights 226 | 227 | # historical data for the stocks in the portfolio and SPY for benchmarking 228 | start_date = "2015-01-01" 229 | end_date = "2024-01-01" 230 | stock_symbols = list(portfolio.keys()) + ['SPY'] 231 | data = yf.download(stock_symbols, start=start_date, end=end_date)['Adj Close'] 232 | 233 | # calculate the value of each stock over time 234 | portfolio_values = pd.DataFrame() 235 | for stock, shares in portfolio.items(): 236 | portfolio_values[stock] = data[stock] * shares 237 | 238 | # sum the individual stock values to get the total portfolio value 239 | portfolio_values['Total Value'] = portfolio_values.sum(axis=1) 240 | 241 | # Nnrmalize the total portfolio value and SPY to start at the same point for comparison 242 | normalized_portfolio = portfolio_values['Total Value'] / portfolio_values['Total Value'].iloc[0] 243 | normalized_spy = data['SPY'] / data['SPY'].iloc[0] 244 | 245 | # plot 246 | fig = go.Figure() 247 | fig.add_trace(go.Scatter(x=normalized_portfolio.index, y=normalized_portfolio*100, mode='lines', name='My Portfolio')) 248 | fig.add_trace(go.Scatter(x=normalized_spy.index, y=normalized_spy*100, mode='lines', name='SPY Benchmark')) 249 | 250 | # update layout 251 | fig.update_layout( 252 | title='Portfolio Performance vs SPY', 253 | xaxis_title='Date', 254 | yaxis_title='Percentage Change', 255 | yaxis_tickformat='%', 256 | legend_title='Legend', 257 | template='plotly_white' 258 | ) 259 | 260 | fig.show() 261 | 262 | if choice == "Next Steps": 263 | st.subheader("Navigate to this [jupyter notebook](https://colab.research.google.com/drive/11NqRw2AIDS8mZ-kjCbjTc36-T_7tnR34#scrollTo=zLIBdzWjdqq3) for a more in depth look at quantitative finance in python!") 264 | 265 | 266 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pycaret==3.3.2 2 | streamlit==1.37.1 3 | neuralprophet==0.9.0 4 | Riskfolio-Lib==6.1.1 5 | QuantStats==0.0.62 6 | yfinance==0.2.41 7 | ydata-profiling==4.9.0 8 | streamlit-pandas-profiling==0.1.3 --------------------------------------------------------------------------------