├── .gitignore ├── 0. Data Prep └── data_prep.py ├── 1. Supervised Learning Models ├── .DS_Store ├── 1. linear_regression.py ├── 2. logistic_regression_model.py ├── 3. naive_bayes_model.py ├── 4. random_forest_model.py ├── README.md ├── linear_regression_summary_with_explanation.png ├── logistic_regression_summary_with_explanation.png ├── naive_bayes_summary_with_explanation.png └── random_forest_summary_with_explanation.png ├── 2. Unsupervised Learning Models ├── .DS_Store ├── 1. clustering.py ├── 2. dimensionality_reduction.py ├── PCA_financial_data_with_full_explanation.png ├── README.md └── kmeans_financial_data_with_explanation.png ├── 3. Deep Learning Models ├── .DS_Store ├── Anomaly_Detection_Using_Autoencoder.png ├── Apple_Stock_Price_Prediction.png ├── Financial_News_Sentiment_Analysis.png ├── GAN_Financial_Simulation.png ├── README.md ├── supervised_deep_learning_models │ ├── .DS_Store │ ├── 1. recurrent_neural_network_RNN_lstm.py │ └── 2. convolutional_neural_networks_(CNNs).py └── unsupervised_deep_learning_models │ ├── .DS_Store │ ├── 3. autoencoders.py │ └── 4. generative_adversarial_networks_(GANs).py ├── 4. Reinforcement Learning Models ├── Q_Learning_Stock_Trading_YFinance.png ├── README.md └── q_learning.py ├── 5. ML Applications In Finance ├── .DS_Store ├── 01. Risk Management ✅ │ ├── .DS_Store │ ├── 1. credit_scoring✅.py │ ├── 2. value-at-risk_modeling✅.py │ ├── 3. fraud_detection✅.py │ └── 4. operational_risk_modeling✅.py ├── 02. Asset Management ✅ │ ├── .DS_Store │ ├── 1. portfolio_optimization✅.py │ ├── 2. algorithmic_trading✅.py │ ├── 3. index_tracking✅.py │ ├── 4. pairs_trading✅.py │ └── requirements.txt ├── 03. Market Analysis And Prediction │ ├── .DS_Store │ ├── 1. price_forecasting.py │ ├── 2. sentiment_analysis.py │ ├── 3. option_pricing.py │ └── 4. order_flow_prediction.py ├── 04. Customer Service │ ├── .DS_Store │ ├── 1. chatbots.py │ ├── 2. personal_finance_management.py │ └── 3. customer_segmentation.py ├── 05. Compliance and Regulatory │ ├── .DS_Store │ ├── 1. anti_money_laundering_AML.py │ ├── 2. regulatory_reporting_automation.py │ └── 3. insider_trading_detection.py ├── 06. Real Estate │ ├── .DS_Store │ ├── 1. property_valuation.py │ └── 2. investment_analysis.py ├── 07. Supply Chain Finance │ └── supply_chain_finance.py ├── 08. Invoice Management │ └── invoice_management.py ├── 09. Cash Management │ └── cash_management.py ├── 10. Decentralized Finance (DEFI) │ ├── 1.yield_farming_optimizer.py │ └── 2.smart_contract_auditor.py ├── 11. Environmental Social And Governance Investing (ESG) │ ├── 1.sustainability_analytics.py │ └── 2.impact_measurement.py ├── 12. Behavioural Economics │ ├── 1.nudges.py │ └── 2.investor_sentiment_models.py ├── 13. Blockchain And Cryptocurrency │ └── cryptocurrency_price_prediction.py ├── 14. Explainable AI For Finance │ └── model_interpretability.py ├── 15. Robotic Process Automation (RPA) │ └── automated_invoice_processing.py ├── 16. Textual And Alternative Data For Finance │ ├── news_analytics.py │ └── reddit_sentiment_and_market_trends.py ├── 17. Fundamental Anaysis │ ├── automated_10kand10Q_parser.py │ └── financial_ratios_dashboard.py ├── 18. Satellite Image Analysis For Finance │ ├── agricultural_yield_prediction.py │ ├── disaster_impact_assessment.py │ ├── natural_resource_exploration.py │ ├── real_estate_development_monitoring.py │ ├── retail_traffic_analysis.py │ ├── shipping_activity.py │ └── tech_stack.txt ├── 19. Venture Capital │ ├── cap_table_simulation.py │ ├── investment_thesis_generator.py │ ├── portfolio_monitoring.py │ └── startup_scouting_dashboard.py ├── 20. Private Equity │ ├── buyout_model_automation.py │ ├── deal_sourcing.py │ ├── due_dilligence_automation.py │ ├── esg_integration.py │ └── post_acquisition_value_creation.py ├── 21. VC & PE General Tools │ ├── investor_matching.py │ ├── sector_trend_analysis.py │ ├── sentiment_analysis_for_foundersandexecutives.py │ └── valuation_multiples_benchmarking.py ├── 22. Investment Banking │ ├── .DS_Store │ ├── 1. mergers_and_acquisitions_M&A │ │ ├── .DS_Store │ │ ├── deal_comparator.py │ │ ├── manda_target_screening.py │ │ └── synergy_estimator.py │ ├── 2. valuation_and_financial_modeling │ │ ├── automated_dcf_model.py │ │ ├── capital_structure_optimizer.py │ │ ├── comparable_company_analysis.py │ │ └── wacc_calculator.py │ ├── 3. risk_management │ │ ├── credit_risk_assessment.py │ │ └── foreign_exchange_risk_management.py │ ├── 4. ipo_process │ │ ├── ipo_valuation_model.py │ │ └── roadshow_presentation_generator.py │ └── 5. client_and_market_analysis │ │ ├── client_relationship_management_CRM.py │ │ ├── market_sentiment_analysis.py │ │ └── pitchbook_automation.py ├── 23. trading │ ├── algorithmic_trading_bot.py │ ├── market_maker_simulator.py │ └── orderbook_visualizer.py ├── 24. Portfolio Management │ ├── portfolio_optimizer.py │ ├── risk_parity_portfolio.py │ └── tax_efficient_portfolio_rebalancer.py ├── 25. Asset Management │ ├── alpha_beta_analysis.py │ ├── etf_tracker.py │ └── performance_attribution_tool.py ├── 26. Wealth Management │ ├── estate_planning_tool.py │ ├── retirement_planner.py │ └── robo_adviser_prototype.py ├── 27. Multi Asset Risk Model │ └── multi_asset_risk_model.py └── 28. Personal Financial Management App │ └── personal_financial_management.py ├── Brewfile ├── README.md ├── main.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # macOS system files 2 | .DS_Store 3 | 4 | # Python generated files and directories 5 | __pycache__/ 6 | *.pyc 7 | *.pyo 8 | *.pyd 9 | .Python 10 | build/ 11 | dist/ 12 | *.egg-info/ 13 | .eggs/ 14 | sdist/ 15 | develop-eggs/ 16 | .idea/ 17 | .vscode/ 18 | *.swp 19 | -------------------------------------------------------------------------------- /0. Data Prep/data_prep.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import yfinance as yf 3 | 4 | def fetch_data(ticker, start_date, end_date): 5 | return yf.download(ticker, start=start_date, end=end_date)['Close'].values 6 | 7 | def create_dataset(data, look_back=1): 8 | X, Y = [], [] 9 | for i in range(len(data) - look_back - 1): 10 | a = data[i:(i + look_back)] 11 | X.append(a) 12 | Y.append(data[i + look_back]) 13 | return np.array(X), np.array(Y) 14 | -------------------------------------------------------------------------------- /1. Supervised Learning Models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/1. Supervised Learning Models/.DS_Store -------------------------------------------------------------------------------- /1. Supervised Learning Models/1. linear_regression.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.linear_model import LinearRegression 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.metrics import mean_squared_error, r2_score 7 | 8 | def fetch_data(ticker, start_date, end_date): 9 | return yf.download(ticker, start=start_date, end=end_date)['Close'].values 10 | 11 | def create_dataset(data, look_back=1): 12 | X, Y = [], [] 13 | for i in range(len(data) - look_back - 1): 14 | X.append(data[i:(i + look_back)]) 15 | Y.append(data[i + look_back]) 16 | return np.array(X), np.array(Y) 17 | 18 | if __name__ == "__main__": 19 | # Fetch and prepare data 20 | data = fetch_data('AAPL', '2010-01-01', '2023-01-01') 21 | X, Y = create_dataset(data) 22 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) 23 | 24 | # Train model 25 | model = LinearRegression() 26 | model.fit(X_train, Y_train) 27 | pred = model.predict(X_test) 28 | 29 | # Evaluate model 30 | mse = mean_squared_error(Y_test, pred) 31 | r2 = r2_score(Y_test, pred) 32 | 33 | # Create a figure and a grid of subplots 34 | fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(16, 12)) 35 | 36 | # Plotting the actual vs predicted values on first subplot 37 | ax[0].scatter(range(len(Y_test)), Y_test, c='g', label='Actual') 38 | ax[0].scatter(range(len(pred)), pred, c='r', label='Predicted') 39 | ax[0].set_xlabel('Index in Test Set') 40 | ax[0].set_ylabel('Stock Price (USD)') 41 | ax[0].legend() 42 | ax[0].set_title('Linear Regression Model: Actual vs Predicted Stock Prices') 43 | 44 | # Annotations and equations on the second subplot 45 | ax[1].axis('off') 46 | ax[1].text(0.1, 0.8, f'Model: Linear Regression', fontsize=12) 47 | ax[1].text(0.1, 0.7, f'Equation: Y = {model.coef_[0]:.2f} * X + {model.intercept_:.2f}', fontsize=12) 48 | ax[1].text(0.1, 0.6, f'Mean Squared Error: {mse:.2f}', fontsize=12) 49 | ax[1].text(0.1, 0.5, f'R^2 Score: {r2:.2f}', fontsize=12) 50 | 51 | # Explanation 52 | explanation = ( 53 | "Explanation:\n" 54 | "Linear Regression tries to fit a linear equation to the data points.\n" 55 | "In this case, we are trying to predict the future stock price of Apple Inc.\n" 56 | "The model takes the stock price of a previous day (X) and predicts the stock price\n" 57 | "for the next day (Y) using the equation Y = Coefficient * X + Intercept.\n" 58 | "MSE and R^2 Score are metrics to evaluate the model's performance." 59 | ) 60 | ax[1].text(0.1, 0.1, explanation, fontsize=12) 61 | 62 | # Save plot as a PNG file 63 | plt.savefig('linear_regression_summary_with_explanation.png') 64 | 65 | # Show the plot 66 | plt.show() 67 | -------------------------------------------------------------------------------- /1. Supervised Learning Models/2. logistic_regression_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yfinance as yf 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import seaborn as sns 6 | from sklearn.linear_model import LogisticRegression 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import accuracy_score, confusion_matrix 9 | from fredapi import Fred 10 | 11 | def fetch_data(ticker, start_date, end_date): 12 | return yf.download(ticker, start=start_date, end=end_date)['Close'].values 13 | 14 | def fetch_fred_data(api_key, series_id, start_date, end_date): 15 | fred = Fred(api_key=api_key) 16 | return fred.get_series(series_id, start_date, end_date).values 17 | 18 | def create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment, look_back=1): 19 | X, Y = [], [] 20 | for i in range(len(stock_data) - look_back - 1): 21 | features = list(stock_data[i:(i + look_back)]) + [sp500_data[i], interest_rates[i], gdp_growth[i], unemployment[i]] 22 | X.append(features) 23 | Y.append(1 if stock_data[i + look_back] > stock_data[i + look_back - 1] else 0) 24 | return np.array(X), np.array(Y) 25 | 26 | if __name__ == "__main__": 27 | api_key = os.getenv('FRED_API_KEY') 28 | if api_key is None: 29 | print("Please set your FRED_API_KEY as an environment variable.") 30 | exit() 31 | 32 | # Fetch and prepare data 33 | stock_data = fetch_data('AAPL', '2010-01-01', '2023-01-01') 34 | sp500_data = fetch_data('^GSPC', '2010-01-01', '2023-01-01') 35 | interest_rates = fetch_fred_data(api_key, 'TB3MS', '2010-01-01', '2023-01-01') 36 | gdp_growth = fetch_fred_data(api_key, 'A191RL1Q225SBEA', '2010-01-01', '2023-01-01') 37 | unemployment = fetch_fred_data(api_key, 'UNRATE', '2010-01-01', '2023-01-01') 38 | 39 | # Truncate data to the smallest length among all series 40 | min_len = min(len(stock_data), len(sp500_data), len(interest_rates), len(gdp_growth), len(unemployment)) 41 | stock_data, sp500_data, interest_rates, gdp_growth, unemployment = [arr[:min_len] for arr in [stock_data, sp500_data, interest_rates, gdp_growth, unemployment]] 42 | 43 | X, Y = create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment) 44 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) 45 | 46 | # Train model 47 | model = LogisticRegression() 48 | model.fit(X_train, Y_train) 49 | pred = model.predict(X_test) 50 | 51 | # Evaluate model 52 | accuracy = accuracy_score(Y_test, pred) 53 | cm = confusion_matrix(Y_test, pred) 54 | 55 | # Create figure 56 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) 57 | 58 | # Plot confusion matrix using Seaborn 59 | sns.heatmap(cm, annot=True, fmt='g', ax=ax1, cmap='Blues') 60 | ax1.set_xlabel('Predicted labels') 61 | ax1.set_ylabel('True labels') 62 | ax1.set_title('Confusion Matrix') 63 | 64 | # Explanation 65 | explanation = ( 66 | f"Model: Logistic Regression\n" 67 | f"Accuracy: {accuracy:.2f}\n\n" 68 | "Predictors:\n" 69 | "- Previous day's stock price\n" 70 | "- S&P 500 index\n" 71 | "- Interest rates\n" 72 | "- GDP growth rates\n" 73 | "- Unemployment rates\n\n" 74 | "Explanation:\n" 75 | "Logistic Regression is a classification algorithm.\n" 76 | "It uses multiple predictors to estimate the probability of the stock price going up (1) or down (0) the next day.\n" 77 | "Accuracy is the metric used to evaluate the model's performance.\n" 78 | "The confusion matrix provides a summary of the number of correct and incorrect predictions." 79 | ) 80 | 81 | ax2.axis('off') 82 | ax2.text(0.1, 0.1, explanation, fontsize=12) 83 | 84 | plt.tight_layout() 85 | plt.savefig('logistic_regression_summary_with_explanation.png') 86 | plt.show() 87 | 88 | -------------------------------------------------------------------------------- /1. Supervised Learning Models/3. naive_bayes_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yfinance as yf 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import seaborn as sns 6 | from sklearn.naive_bayes import GaussianNB 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import accuracy_score, confusion_matrix 9 | from fredapi import Fred 10 | 11 | def fetch_data(ticker, start_date, end_date): 12 | """Fetch stock or index data using yfinance.""" 13 | return yf.download(ticker, start=start_date, end=end_date)['Close'].values 14 | 15 | def fetch_fred_data(api_key, series_id, start_date, end_date): 16 | """Fetch macroeconomic data using FRED API.""" 17 | fred = Fred(api_key=api_key) 18 | return fred.get_series(series_id, start_date, end_date).values 19 | 20 | def create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment, look_back=1): 21 | """Create dataset combining stock data and macroeconomic indicators.""" 22 | X, Y = [], [] 23 | for i in range(len(stock_data) - look_back - 1): 24 | features = list(stock_data[i:(i + look_back)]) + [sp500_data[i], interest_rates[i], gdp_growth[i], unemployment[i]] 25 | X.append(features) 26 | Y.append(1 if stock_data[i + look_back] > stock_data[i + look_back - 1] else 0) 27 | return np.array(X), np.array(Y) 28 | 29 | if __name__ == "__main__": 30 | # Get FRED API Key from environment variable 31 | api_key = os.getenv('FRED_API_KEY') 32 | if api_key is None: 33 | print("Please set your FRED_API_KEY as an environment variable.") 34 | exit() 35 | 36 | # Fetch and prepare various data 37 | stock_data = fetch_data('AAPL', '2010-01-01', '2023-01-01') 38 | sp500_data = fetch_data('^GSPC', '2010-01-01', '2023-01-01') 39 | interest_rates = fetch_fred_data(api_key, 'TB3MS', '2010-01-01', '2023-01-01') 40 | gdp_growth = fetch_fred_data(api_key, 'A191RL1Q225SBEA', '2010-01-01', '2023-01-01') 41 | unemployment = fetch_fred_data(api_key, 'UNRATE', '2010-01-01', '2023-01-01') 42 | 43 | # Make sure all data series are of the same length 44 | min_len = min(len(stock_data), len(sp500_data), len(interest_rates), len(gdp_growth), len(unemployment)) 45 | stock_data, sp500_data, interest_rates, gdp_growth, unemployment = stock_data[:min_len], sp500_data[:min_len], interest_rates[:min_len], gdp_growth[:min_len], unemployment[:min_len] 46 | 47 | # Create dataset 48 | X, Y = create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment) 49 | 50 | # Split dataset 51 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) 52 | 53 | # Train model 54 | model = GaussianNB() 55 | model.fit(X_train, Y_train) 56 | pred = model.predict(X_test) 57 | 58 | # Evaluate model 59 | accuracy = accuracy_score(Y_test, pred) 60 | cm = confusion_matrix(Y_test, pred) 61 | 62 | # Create figure for visualization 63 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) 64 | 65 | # Plot confusion matrix 66 | sns.heatmap(cm, annot=True, fmt='g', ax=ax1, cmap='Blues') 67 | ax1.set_xlabel('Predicted labels') 68 | ax1.set_ylabel('True labels') 69 | ax1.set_title('Confusion Matrix') 70 | 71 | # Add explanation text 72 | explanation = ( 73 | f"Model: Gaussian Naive Bayes\n" 74 | f"Accuracy: {accuracy:.2f}\n\n" 75 | "Predictors:\n" 76 | "- Previous day's stock price\n" 77 | "- S&P 500 index\n" 78 | "- Interest rates\n" 79 | "- GDP growth rates\n" 80 | "- Unemployment rates\n\n" 81 | "Explanation:\n" 82 | "Naive Bayes is a probabilistic classification algorithm.\n" 83 | "In this context, it predicts whether the stock price will go up (1) or down (0) the next day based on Bayes' theorem.\n" 84 | "Accuracy is the metric used to evaluate the model's performance.\n" 85 | "The confusion matrix provides a summary of the number of correct and incorrect predictions." 86 | ) 87 | ax2.axis('off') 88 | ax2.text(0.1, 0.1, explanation, fontsize=12) 89 | 90 | # Save and show plot 91 | plt.tight_layout() 92 | plt.savefig('naive_bayes_summary_with_explanation.png') 93 | plt.show() 94 | 95 | -------------------------------------------------------------------------------- /1. Supervised Learning Models/4. random_forest_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yfinance as yf 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import seaborn as sns 6 | from sklearn.ensemble import RandomForestClassifier 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import accuracy_score, confusion_matrix 9 | from fredapi import Fred 10 | from textwrap import wrap 11 | 12 | # Fetch stock data 13 | def fetch_data(ticker, start_date, end_date): 14 | return yf.download(ticker, start=start_date, end=end_date)['Close'].values 15 | 16 | # Fetch economic indicators using the FRED API 17 | def fetch_fred_data(api_key, series_id, start_date, end_date): 18 | fred = Fred(api_key=api_key) 19 | return fred.get_series(series_id, start_date, end_date).values 20 | 21 | # Create dataset with predictors and target 22 | def create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment, look_back=1): 23 | X, Y = [], [] 24 | for i in range(len(stock_data) - look_back - 1): 25 | features = list(stock_data[i:(i + look_back)]) + [sp500_data[i], interest_rates[i], gdp_growth[i], unemployment[i]] 26 | X.append(features) 27 | Y.append(1 if stock_data[i + look_back] > stock_data[i + look_back - 1] else 0) 28 | return np.array(X), np.array(Y) 29 | 30 | if __name__ == "__main__": 31 | # Get FRED API key from environment variable 32 | api_key = os.getenv('FRED_API_KEY') 33 | if api_key is None: 34 | print("Please set your FRED_API_KEY as an environment variable.") 35 | exit() 36 | 37 | # Fetch data 38 | stock_data = fetch_data('AAPL', '2010-01-01', '2023-01-01') 39 | sp500_data = fetch_data('^GSPC', '2010-01-01', '2023-01-01') 40 | interest_rates = fetch_fred_data(api_key, 'TB3MS', '2010-01-01', '2023-01-01') 41 | gdp_growth = fetch_fred_data(api_key, 'A191RL1Q225SBEA', '2010-01-01', '2023-01-01') 42 | unemployment = fetch_fred_data(api_key, 'UNRATE', '2010-01-01', '2023-01-01') 43 | 44 | # After fetching the data, ensure they all have the same length 45 | min_len = min(len(stock_data), len(sp500_data), len(interest_rates), len(gdp_growth), len(unemployment)) 46 | 47 | stock_data = stock_data[:min_len] 48 | sp500_data = sp500_data[:min_len] 49 | interest_rates = interest_rates[:min_len] 50 | gdp_growth = gdp_growth[:min_len] 51 | unemployment = unemployment[:min_len] 52 | 53 | 54 | # Create dataset 55 | X, Y = create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment) 56 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) 57 | 58 | # Train Random Forest model 59 | model = RandomForestClassifier(n_estimators=100) 60 | model.fit(X_train, Y_train) 61 | pred = model.predict(X_test) 62 | 63 | # Evaluate the model 64 | accuracy = accuracy_score(Y_test, pred) 65 | cm = confusion_matrix(Y_test, pred) 66 | 67 | # Create visualization 68 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) 69 | 70 | # Plot confusion matrix 71 | sns.heatmap(cm, annot=True, fmt='g', ax=ax1, cmap='Blues') 72 | ax1.set_xlabel('Predicted labels') 73 | ax1.set_ylabel('True labels') 74 | ax1.set_title('Confusion Matrix') 75 | 76 | # Explanation text 77 | explanation = ( 78 | f"Model: Random Forest\n" 79 | f"Accuracy: {accuracy:.2f}\n\n" 80 | "Predictors:\n" 81 | "- Previous day's stock price\n" 82 | "- S&P 500 index\n" 83 | "- Interest rates\n" 84 | "- GDP growth rates\n" 85 | "- Unemployment rates\n\n" 86 | "Explanation:\n" 87 | "The Random Forest model is an ensemble learning method primarily used for classification and regression tasks. " 88 | "It employs multiple decision trees during training and outputs the mode of classes (classification) or mean prediction (regression) of the individual trees for a more robust and accurate prediction.\n" 89 | "1. Bagging: Random Forest uses 'Bootstrap Aggregating' or Bagging, where random subsets of the training data are chosen with replacement to train each decision tree. " 90 | "This diversity ensures that each decision tree is different and prevents overfitting.\n" 91 | "2. Decision Trees: Each subset of data constructs a decision tree. Unlike a single decision tree that uses all features to make a decision at each node, " 92 | "Random Forest selects a random subset of features for every node split. This randomness contributes to 'decorrelating' the trees, thereby boosting the model's performance.\n" 93 | "3. Features: In our case, features like the previous day's stock price and S&P 500 index could be dominant factors in market movements. " 94 | "Interest rates influence investment sentiment, GDP growth rates show economic health, and unemployment rates can reflect consumer spending, all affecting the stock price. " 95 | "Random Forest takes all these features into account for each tree.\n" 96 | "4. Prediction: Once all trees are built, the model makes a prediction for a new data point by letting each tree in the ensemble 'vote' for a class. " 97 | "In your binary classification task (stock price going up as '1' or down as '0'), the majority vote will be the final output of the model.\n" 98 | "5. Majority Voting: The Random Forest uses 'majority voting' to finalize the prediction. " 99 | "The class that receives the most votes from all the trees in the forest becomes the model's prediction.\n" 100 | "6. Equation for Classification: The final prediction, \(y\), is determined as \(y = \\mathrm{mode}(y_1, y_2, \\ldots, y_N)\), where \(N\) is the number of trees in the forest.\n" 101 | "By aggregating the insights and 'votes' from multiple decision trees, Random Forest provides a more balanced and nuanced understanding of the complex relationships among the predictors." 102 | ) 103 | 104 | # Wrap the text to make it fit into the figure neatly 105 | wrapped_text = "\n".join(wrap(explanation, width=80)) # 60 characters per line, adjust as needed 106 | 107 | ax2.axis('off') 108 | ax2.text(0.01, 0.99, wrapped_text, fontsize=9, va='top') # Aligns text at top left corner with a fontsize of 10 109 | 110 | plt.tight_layout() 111 | plt.savefig('random_forest_summary_with_explanation.png') 112 | plt.show() -------------------------------------------------------------------------------- /1. Supervised Learning Models/README.md: -------------------------------------------------------------------------------- 1 | ![alt text](linear_regression_summary_with_explanation.png) 2 | ![alt text](logistic_regression_summary_with_explanation.png) 3 | ![alt text](naive_bayes_summary_with_explanation.png) 4 | ![alt text](random_forest_summary_with_explanation.png) -------------------------------------------------------------------------------- /1. Supervised Learning Models/linear_regression_summary_with_explanation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/1. Supervised Learning Models/linear_regression_summary_with_explanation.png -------------------------------------------------------------------------------- /1. Supervised Learning Models/logistic_regression_summary_with_explanation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/1. Supervised Learning Models/logistic_regression_summary_with_explanation.png -------------------------------------------------------------------------------- /1. Supervised Learning Models/naive_bayes_summary_with_explanation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/1. Supervised Learning Models/naive_bayes_summary_with_explanation.png -------------------------------------------------------------------------------- /1. Supervised Learning Models/random_forest_summary_with_explanation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/1. Supervised Learning Models/random_forest_summary_with_explanation.png -------------------------------------------------------------------------------- /2. Unsupervised Learning Models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/2. Unsupervised Learning Models/.DS_Store -------------------------------------------------------------------------------- /2. Unsupervised Learning Models/1. clustering.py: -------------------------------------------------------------------------------- 1 | # k-means clustering 2 | import yfinance as yf 3 | import pandas_datareader.data as web 4 | import pandas as pd 5 | from sklearn.cluster import KMeans 6 | import matplotlib.pyplot as plt 7 | from textwrap import wrap 8 | 9 | # Download stock data from Yahoo Finance 10 | tickers = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'TSLA'] 11 | stock_data = yf.download(tickers, start='2020-01-01', end='2021-01-01')['Adj Close'] 12 | 13 | # Download S&P 500 data from FRED 14 | sp500 = web.DataReader('SP500', 'fred', '2020-01-01', '2021-01-01') 15 | 16 | # Combine stock and S&P 500 data 17 | data = pd.concat([stock_data, sp500], axis=1).dropna() 18 | data = data.pct_change().dropna() # Calculate daily returns 19 | 20 | # K-Means clustering 21 | kmeans = KMeans(n_clusters=3) 22 | kmeans.fit(data) 23 | labels = kmeans.labels_ 24 | 25 | # Create plot with subplots 26 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) 27 | 28 | # Main plot on ax1 29 | ax1.scatter(data.iloc[:, 0], data.iloc[:, 1], c=labels, cmap='viridis') 30 | ax1.set_title('K-Means Clustering of Stock Data and S&P 500') 31 | ax1.set_xlabel('AAPL Daily Returns') 32 | ax1.set_ylabel('GOOGL Daily Returns') 33 | 34 | # Explanation on ax2 35 | explanation = ( 36 | "Algorithm: K-Means Clustering\n" 37 | "Number of Clusters: 3\n" 38 | "Data: Stock prices and S&P 500 index\n\n" 39 | "Explanation:\n" 40 | "K-means partitions the financial data into 'K' clusters based on daily returns. " 41 | "While it's not generally used for prediction, it provides valuable insights into data structure. " 42 | "These insights can be instrumental for:\n\n" 43 | "- Portfolio Diversification: Identifying statistically similar assets for diversification.\n" 44 | "- Risk Management: Recognizing asset groups for better hedging strategies.\n" 45 | "- Market Regime Identification: Understanding different market states for dynamic trading." 46 | ) 47 | wrapped_explanation = "\n".join(wrap(explanation, 50)) # Wraps the text at 50 characters 48 | 49 | ax2.axis('off') 50 | ax2.text(0.01, 0.99, wrapped_explanation, fontsize=10, va='top', wrap=True) # Aligns text at top left corner with a fontsize of 10 51 | 52 | # Save and show plot 53 | plt.tight_layout() 54 | plt.savefig('kmeans_financial_data_with_explanation.png') 55 | plt.show() 56 | -------------------------------------------------------------------------------- /2. Unsupervised Learning Models/2. dimensionality_reduction.py: -------------------------------------------------------------------------------- 1 | # Import necessary libraries 2 | import yfinance as yf 3 | import pandas_datareader.data as web 4 | import pandas as pd 5 | import numpy as np 6 | from sklearn.decomposition import PCA 7 | import matplotlib.pyplot as plt 8 | from textwrap import wrap 9 | 10 | # Download stock data from Yahoo Finance 11 | tickers = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'TSLA'] 12 | stock_data = yf.download(tickers, start='2020-01-01', end='2021-01-01')['Adj Close'] 13 | 14 | # Download S&P 500 data from FRED 15 | sp500 = web.DataReader('SP500', 'fred', '2020-01-01', '2021-01-01') 16 | 17 | # Combine stock and S&P 500 data 18 | data = pd.concat([stock_data, sp500], axis=1).dropna() 19 | data = data.pct_change().dropna() # Calculate daily returns 20 | 21 | # Apply PCA 22 | pca = PCA(n_components=2) 23 | principal_components = pca.fit_transform(data) 24 | 25 | # Analyze the components 26 | components_df = pd.DataFrame(pca.components_, columns=data.columns, index=[f'PC{i+1}' for i in range(2)]) 27 | 28 | # Create plot with subplots 29 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) 30 | 31 | # Main plot on ax1 32 | ax1.scatter(principal_components[:, 0], principal_components[:, 1], c='blue') 33 | ax1.set_title('PCA of Stock Data and S&P 500') 34 | ax1.set_xlabel('Principal Component 1') 35 | ax1.set_ylabel('Principal Component 2') 36 | 37 | # Turn off axis for ax2 38 | ax2.axis('off') 39 | 40 | # Initial explanation 41 | initial_explanation = ( 42 | "Algorithm: Principal Component Analysis (PCA)\n" 43 | "Components: 2\n" 44 | "Data: Stock prices and S&P 500 index\n\n" 45 | "PCA reduces the dimensionality of the data by finding new variables (Principal Components) that maximize variance." 46 | "This is useful for:\n" 47 | "- Data Visualization: Reducing dimensions aids in visualizing complex data.\n" 48 | "- Risk Modeling: Identifying primary risk factors in a portfolio.\n" 49 | "- Factor Analysis: Understanding the underlying factors affecting asset prices.\n" 50 | ) 51 | 52 | # New section explaining the drivers of the components 53 | new_section = ( 54 | f"\nPrincipal Component 1 is most influenced by {components_df.loc['PC1'].idxmax()}.\n" 55 | f"Principal Component 2 is most influenced by {components_df.loc['PC2'].idxmax()}.\n" 56 | ) 57 | 58 | # Combine initial explanation and new section 59 | full_explanation = initial_explanation + new_section 60 | 61 | # Additional useful explanation 62 | why_useful = ( 63 | "\nUsefulness:\n" 64 | "1. Portfolio Optimization: Identify key drivers of asset returns.\n" 65 | "2. Risk Management: Uncover main risk factors.\n" 66 | "3. Trading Strategies: Develop strategies based on hidden factors.\n" 67 | "4. Data Visualization: Easier interpretation of high-dimensional data.\n" 68 | "5. Correlation Structure: Simplify data complexity.\n" 69 | "6. Market Regime Identification: Adapt trading strategies dynamically." 70 | ) 71 | 72 | # Combine the original explanation, the 'why useful' section, and the new section 73 | full_explanation += why_useful 74 | 75 | wrapped_full_explanation = "\n".join(wrap(full_explanation, 50)) 76 | ax2.text(0.01, 0.99, wrapped_full_explanation, fontsize=10, va='top', wrap=True) 77 | 78 | # Save and show plot 79 | plt.tight_layout() 80 | plt.savefig('PCA_financial_data_with_full_explanation.png') 81 | plt.show() 82 | 83 | # Output the components for further analysis 84 | print("Principal Component Analysis") 85 | print(components_df) 86 | -------------------------------------------------------------------------------- /2. Unsupervised Learning Models/PCA_financial_data_with_full_explanation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/2. Unsupervised Learning Models/PCA_financial_data_with_full_explanation.png -------------------------------------------------------------------------------- /2. Unsupervised Learning Models/README.md: -------------------------------------------------------------------------------- 1 | ![alt text](kmeans_financial_data_with_explanation.png) 2 | ![alt text](PCA_financial_data_with_full_explanation.png) 3 | -------------------------------------------------------------------------------- /2. Unsupervised Learning Models/kmeans_financial_data_with_explanation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/2. Unsupervised Learning Models/kmeans_financial_data_with_explanation.png -------------------------------------------------------------------------------- /3. Deep Learning Models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/.DS_Store -------------------------------------------------------------------------------- /3. Deep Learning Models/Anomaly_Detection_Using_Autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/Anomaly_Detection_Using_Autoencoder.png -------------------------------------------------------------------------------- /3. Deep Learning Models/Apple_Stock_Price_Prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/Apple_Stock_Price_Prediction.png -------------------------------------------------------------------------------- /3. Deep Learning Models/Financial_News_Sentiment_Analysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/Financial_News_Sentiment_Analysis.png -------------------------------------------------------------------------------- /3. Deep Learning Models/GAN_Financial_Simulation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/GAN_Financial_Simulation.png -------------------------------------------------------------------------------- /3. Deep Learning Models/README.md: -------------------------------------------------------------------------------- 1 | ![alt text](Apple_Stock_Price_Prediction.png) 2 | ![alt text](Financial_News_Sentiment_Analysis.png) 3 | ![alt text](Anomaly_Detection_Using_Autoencoder.png) 4 | ![alt text](GAN_Financial_Simulation.png) 5 | 6 | -------------------------------------------------------------------------------- /3. Deep Learning Models/supervised_deep_learning_models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/supervised_deep_learning_models/.DS_Store -------------------------------------------------------------------------------- /3. Deep Learning Models/supervised_deep_learning_models/1. recurrent_neural_network_RNN_lstm.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Long Short-Term Memory networks (LSTMs) are a type of Recurrent Neural Network (RNN) and are typically used in the context 3 | of supervised learning, particularly for sequence prediction problems like time series forecasting, natural language processing, 4 | and more. In these applications, you usually have labeled data where the sequence input is associated with a corresponding output. 5 | 6 | That being said, LSTMs can also be used in unsupervised learning scenarios. For example, you can use LSTMs in autoencoders for 7 | sequence-to-sequence reconstruction, anomaly detection in time series data, or learning embeddings for sequences without explicit labels. 8 | ''' 9 | 10 | ''' 11 | What is an LSTM? 12 | Long Short-Term Memory (LSTM) is a type of recurrent neural network (RNN) architecture. An LSTM is designed to remember past information in sequence data and is widely used in time series analysis, natural language processing, and many other sequence-related tasks. Unlike standard feedforward neural networks, LSTMs have "memory" in the form of a cell state and hidden state, which helps them learn from the "context" or "sequence" of the inputs. 13 | 14 | How it works? 15 | Input Sequence: At each time step, the LSTM takes in an input and the previous cell state and hidden state 16 | 17 | Forget Gate: Decide what information from the cell state should be thrown away. 18 | 19 | Input Gate: Update the cell state with new information. 20 | 21 | Output Gate: Based on the cell state and the input, decide what should be the new hidden state 22 | 23 | New Cell State: Finally, calculate the new cell state 24 | 25 | Predictive Power 26 | LSTMs are particularly useful for solving problems that require learning long-term dependencies. They are less susceptible to the vanishing gradient problem, which allows them to learn from data where the important features are separated by many time steps. This makes them highly efficient for various sequence-based tasks such as time-series prediction, sequence-to-sequence mapping, and so on. 27 | 28 | In finance, LSTMs can be used for predicting stock prices, forex trading, and even for algorithmic trading strategies. However, it's crucial to note that the financial markets are influenced by a multitude of factors, many of which can be non-sequential or not included in the model. So while LSTMs can capture patterns in past data efficiently, they are by no means a guarantee for high accuracy in financial predictions. 29 | 30 | By setting up a proper evaluation metric (like RMSE for regression tasks, or F1-score for classification tasks), you can get a quantitative measure of how well your LSTM model is likely to perform on unseen data. 31 | ''' 32 | import numpy as np 33 | import pandas as pd 34 | import yfinance as yf 35 | from sklearn.preprocessing import MinMaxScaler 36 | from tensorflow.keras.models import Sequential 37 | from tensorflow.keras.layers import LSTM, Dense 38 | from sklearn.metrics import mean_squared_error, mean_absolute_error 39 | from math import sqrt 40 | import matplotlib.pyplot as plt 41 | 42 | # Download the Apple stock price data 43 | data = yf.download('AAPL', start='2019-01-01', end='2021-01-01') 44 | data = data[['Close']] 45 | 46 | # Data Preprocessing 47 | scaler = MinMaxScaler(feature_range=(0, 1)) 48 | scaled_data = scaler.fit_transform(data.values) 49 | 50 | # Create a dataset for training the LSTM model 51 | train_data = scaled_data[:int(0.8 * len(scaled_data))] 52 | x_train, y_train = [], [] 53 | for i in range(60, len(train_data)): 54 | x_train.append(train_data[i-60:i, 0]) 55 | y_train.append(train_data[i, 0]) 56 | 57 | x_train, y_train = np.array(x_train), np.array(y_train) 58 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) 59 | 60 | # Debugging 61 | print(f"Total data length: {len(data)}") 62 | print(f"Training data length: {len(train_data)}") 63 | 64 | # Building and Training the LSTM Model 65 | model = Sequential() 66 | model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1))) 67 | model.add(LSTM(units=50, return_sequences=False)) 68 | model.add(Dense(units=25)) 69 | model.add(Dense(units=1)) 70 | 71 | # Compile and train the model 72 | model.compile(optimizer='adam', loss='mean_squared_error') 73 | history = model.fit(x_train, y_train, batch_size=1, epochs=1) 74 | 75 | # Output the training loss 76 | print(f"Training loss: {history.history['loss'][0]}") 77 | 78 | # Testing the Model 79 | test_data = scaled_data[int(0.8 * len(scaled_data)) - 60:] 80 | x_test, y_test = [], [] 81 | for i in range(60, len(test_data)): 82 | x_test.append(test_data[i-60:i, 0]) 83 | y_test.append(test_data[i, 0]) 84 | 85 | x_test, y_test = np.array(x_test), np.array(y_test) 86 | x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) 87 | 88 | # Make predictions using the test set 89 | predicted_price = model.predict(x_test) 90 | predicted_price = scaler.inverse_transform(np.reshape(predicted_price, (-1, 1))) 91 | 92 | # Calculate Test Loss 93 | test_loss = model.evaluate(x_test, y_test) 94 | print(f"Test loss: {test_loss}") 95 | 96 | # Calculate Root Mean Square Error (RMSE) 97 | rmse = sqrt(mean_squared_error(y_test, predicted_price)) 98 | print(f'Root Mean Square Error (RMSE): {rmse}') 99 | 100 | # Calculate Mean Absolute Error (MAE) 101 | mae = mean_absolute_error(y_test, predicted_price) 102 | print(f'Mean Absolute Error (MAE): {mae}') 103 | 104 | # Visualizing the Results 105 | plt.figure(figsize=(16, 8)) 106 | 107 | # Plot the real stock price 108 | plt.plot(data.index, data['Close'], label='True Price') 109 | 110 | # Generate the index for the predicted price 111 | predicted_index = data.index[-100:] # Adjust the number to match the new shape 112 | 113 | # Debugging: Verifying dimensions before plotting 114 | print(f"Shape of predicted_price: {predicted_price.shape}") 115 | print(f"Shape of predicted_index: {len(predicted_index)}") 116 | print(f"First few elements of predicted_index: {predicted_index[:5]}") 117 | print(f"Last few elements of predicted_index: {predicted_index[-5:]}") 118 | print(f"Length of data.index: {len(data.index)}") 119 | print(f"Length of train_data: {len(train_data)}") 120 | print(f"Length of train_data + 60: {len(train_data) + 60}") 121 | 122 | 123 | # Adjusting predicted_price to match the length of predicted_index 124 | predicted_price = predicted_price[:len(predicted_index)] 125 | 126 | # Plot the predicted stock price 127 | if len(predicted_index) == predicted_price.shape[0]: 128 | plt.plot(predicted_index, predicted_price.flatten(), label='Predicted Price') 129 | else: 130 | print("Shape mismatch: Skipping plotting of predicted prices") 131 | 132 | # Add performance metrics and explanations to the plot 133 | metrics_text = f'''Test Loss: {0.0194} (Lower is better) 134 | RMSE: {109.8886} (Lower is better, dependent on scale of target variable) 135 | MAE: {109.8511} (Lower is better, dependent on scale of target variable)''' 136 | 137 | plt.text(0.02, 0.5, metrics_text, transform=plt.gca().transAxes, fontsize=12, verticalalignment='center', bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10}) 138 | 139 | plt.legend() 140 | plt.title("Apple Stock Price Prediction using LSTM") 141 | plt.xlabel("Date") 142 | plt.ylabel("Stock Price") 143 | 144 | # Save the plot as a .png file 145 | plt.savefig('Apple_Stock_Price_Prediction.png') 146 | 147 | # Show the plot 148 | plt.show() 149 | -------------------------------------------------------------------------------- /3. Deep Learning Models/supervised_deep_learning_models/2. convolutional_neural_networks_(CNNs).py: -------------------------------------------------------------------------------- 1 | # CNNs are often supervised models (labeled data used for training), primarily used for image classification 2 | 3 | from sklearn.model_selection import train_test_split 4 | from tensorflow.keras.models import Sequential 5 | from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | 9 | # Simulated data (PLEASE replace with real scraped data for any serious application) 10 | headlines = ["Stocks are up today", "Markets crash due to economic instability", "Neutral day in the market"] * 10 # Replicating for more data 11 | labels = [1, 0, 2] * 10 # 1: positive, 0: negative, 2: neutral 12 | 13 | # Text Preprocessing 14 | from tensorflow.keras.preprocessing.text import Tokenizer 15 | from tensorflow.keras.preprocessing.sequence import pad_sequences 16 | 17 | tokenizer = Tokenizer() 18 | tokenizer.fit_on_texts(headlines) 19 | vocab_size = len(tokenizer.word_index) + 1 20 | max_length = max([len(s.split()) for s in headlines]) 21 | 22 | sequences = tokenizer.texts_to_sequences(headlines) 23 | padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post') 24 | 25 | # Data Splitting 26 | X_train, X_test, y_train, y_test = train_test_split(padded_sequences, np.array(labels), test_size=0.2, random_state=42) 27 | 28 | # Model Building 29 | model = Sequential() 30 | model.add(Embedding(vocab_size, 16, input_length=max_length)) 31 | model.add(Conv1D(16, 3, activation='relu')) 32 | model.add(GlobalMaxPooling1D()) 33 | model.add(Dense(3, activation='softmax')) 34 | 35 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) 36 | history = model.fit(X_train, y_train, epochs=10, batch_size=2, validation_split=0.2) 37 | 38 | # Testing 39 | loss, accuracy = model.evaluate(X_test, y_test) 40 | print(f"Test Loss: {loss}") 41 | print(f"Test Accuracy: {accuracy}") 42 | 43 | # Visualization 44 | plt.figure(figsize=(16, 8)) 45 | 46 | plt.subplot(1, 2, 1) 47 | plt.plot(history.history['accuracy'], label='Train Accuracy') 48 | plt.plot(history.history['val_accuracy'], label='Validation Accuracy') 49 | plt.title('Model Accuracy') 50 | plt.xlabel('Epochs') 51 | plt.ylabel('Accuracy') 52 | plt.legend() 53 | 54 | plt.subplot(1, 2, 2) 55 | plt.plot(history.history['loss'], label='Train Loss') 56 | plt.plot(history.history['val_loss'], label='Validation Loss') 57 | plt.title('Model Loss') 58 | plt.xlabel('Epochs') 59 | plt.ylabel('Loss') 60 | plt.legend() 61 | 62 | # Add performance metrics and explanations to the plot 63 | metrics_text = f'''Test Loss: {loss:.4f} (Lower is better) 64 | Test Accuracy: {accuracy:.4f} (Higher is better)''' 65 | 66 | plt.gcf().text(0.02, 0.5, metrics_text, fontsize=12, verticalalignment='center', bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10}) 67 | 68 | # Add model explanation to the plot 69 | model_explanation = '''This CNN model analyzes financial news headlines to categorize the sentiment as Positive, Negative, or Neutral. 70 | The model is trained on tokenized text data, and uses Conv1D layers to identify local patterns within the text. 71 | After training, the model is evaluated on a separate test set to assess its predictive accuracy.''' 72 | 73 | plt.gcf().text(0.6, 0.2, model_explanation, fontsize=6, verticalalignment='center', bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10}) 74 | 75 | plt.suptitle("Financial News Sentiment Analysis using CNN") 76 | plt.tight_layout(rect=[0, 0.03, 1, 0.95]) 77 | 78 | # Save the plot as a .png file 79 | plt.savefig('Financial_News_Sentiment_Analysis.png') 80 | 81 | plt.show() 82 | -------------------------------------------------------------------------------- /3. Deep Learning Models/unsupervised_deep_learning_models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/unsupervised_deep_learning_models/.DS_Store -------------------------------------------------------------------------------- /3. Deep Learning Models/unsupervised_deep_learning_models/3. autoencoders.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.datasets import make_classification 4 | from sklearn.preprocessing import StandardScaler 5 | from sklearn.model_selection import train_test_split 6 | from tensorflow.keras.layers import Input, Dense 7 | from tensorflow.keras.models import Model 8 | 9 | # Generate synthetic financial data (replace with real financial data) 10 | X, _ = make_classification(n_samples=1000, n_features=20) 11 | X = StandardScaler().fit_transform(X) 12 | X_train, X_test = train_test_split(X, test_size=0.2, random_state=42) 13 | 14 | # Create autoencoder model 15 | input_layer = Input(shape=(20,)) 16 | encoded = Dense(14, activation='relu')(input_layer) 17 | decoded = Dense(20, activation='sigmoid')(encoded) 18 | 19 | autoencoder = Model(inputs=input_layer, outputs=decoded) 20 | autoencoder.compile(optimizer='adam', loss='mean_squared_error') 21 | 22 | # Train the model 23 | history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1) 24 | 25 | # Use the trained autoencoder to predict test data 26 | X_test_predictions = autoencoder.predict(X_test) 27 | mse = np.mean(np.power(X_test - X_test_predictions, 2), axis=1) 28 | 29 | # Set a threshold for anomaly detection 30 | threshold = np.quantile(mse, 0.95) 31 | 32 | # Visualize results 33 | fig, axes = plt.subplots(1, 2, figsize=(24, 12)) 34 | 35 | # Plot histogram 36 | axes[0].hist(mse, bins=50, alpha=0.6, color='g', label='Normal') 37 | axes[0].axvline(x=threshold, color='r', linestyle='dashed', linewidth=2, label=f'Anomaly threshold ({threshold:.4f})') 38 | axes[0].set_title("Anomaly Detection using Autoencoder in Finance") 39 | axes[0].set_xlabel("Mean Squared Error (MSE)") 40 | axes[0].set_ylabel("Frequency") 41 | axes[0].legend() 42 | 43 | # Plot MSE over samples 44 | axes[1].plot(mse, label='MSE') 45 | axes[1].axhline(y=threshold, color='r', linestyle='dashed', linewidth=2, label=f'Anomaly threshold ({threshold:.4f})') 46 | axes[1].scatter(np.where(mse > threshold), mse[mse > threshold], color='r', zorder=5, label='Anomalies') 47 | axes[1].set_title("MSE Values Over Test Samples") 48 | axes[1].set_xlabel("Test Sample Index") 49 | axes[1].set_ylabel("Mean Squared Error (MSE)") 50 | axes[1].legend() 51 | 52 | # Add key statistics 53 | stats_text = f'''Key Statistics: 54 | - Training Data Size: {X_train.shape[0]} 55 | - Test Data Size: {X_test.shape[0]} 56 | - Anomaly Threshold (95 percentile): {threshold:.4f}''' 57 | 58 | fig.text(0.15, 0.1, stats_text, fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.8, 'pad': 10}) 59 | 60 | # Add model explanation 61 | model_explanation = '''Model Explanation: 62 | Autoencoders can be valuable in detecting anomalies in trading and identifying fraudulent transactions. 63 | By training the autoencoder on 'normal' financial data, it learns to reconstruct similar data efficiently. 64 | Anomalies (unusual patterns) result in higher reconstruction errors (MSE), making them identifiable.''' 65 | 66 | fig.text(0.6, 0.1, model_explanation, fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.8, 'pad': 10}) 67 | 68 | # Save the plot as a PNG file 69 | plt.savefig('Anomaly_Detection_Using_Autoencoder.png') 70 | 71 | plt.show() 72 | -------------------------------------------------------------------------------- /3. Deep Learning Models/unsupervised_deep_learning_models/4. generative_adversarial_networks_(GANs).py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from tensorflow.keras.models import Model 4 | from tensorflow.keras.layers import Input, Dense 5 | from tensorflow.keras.optimizers import Adam 6 | 7 | # Generate some synthetic "real" financial market return data (replace with real data) 8 | np.random.seed(0) 9 | real_data = np.random.normal(0, 1, (1000, 1)) 10 | 11 | # Generator Model 12 | input_noise = Input(shape=(10,)) 13 | hidden_layer_g = Dense(30, activation='relu')(input_noise) 14 | generated_data = Dense(1, activation='linear')(hidden_layer_g) 15 | generator = Model(inputs=input_noise, outputs=generated_data) 16 | 17 | # Discriminator Model 18 | input_real_data = Input(shape=(1,)) 19 | hidden_layer_d = Dense(30, activation='relu')(input_real_data) 20 | validity = Dense(1, activation='sigmoid')(hidden_layer_d) 21 | discriminator = Model(inputs=input_real_data, outputs=validity) 22 | discriminator.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy']) 23 | 24 | # GAN Model 25 | discriminator.trainable = False 26 | gan_output = discriminator(generator(input_noise)) 27 | gan = Model(inputs=input_noise, outputs=gan_output) 28 | gan.compile(loss='binary_crossentropy', optimizer=Adam()) 29 | 30 | # Training parameters 31 | epochs = 1000 32 | batch_size = 32 33 | 34 | # Train GAN 35 | for epoch in range(epochs): 36 | # Train Discriminator 37 | noise = np.random.normal(0, 1, (batch_size, 10)) 38 | generated_data = generator.predict(noise) 39 | real_data_batch = real_data[np.random.randint(0, real_data.shape[0], batch_size)] 40 | labels_real = np.ones((batch_size, 1)) 41 | labels_fake = np.zeros((batch_size, 1)) 42 | d_loss_real = discriminator.train_on_batch(real_data_batch, labels_real) 43 | d_loss_fake = discriminator.train_on_batch(generated_data, labels_fake) 44 | d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) 45 | 46 | # Train Generator 47 | noise = np.random.normal(0, 1, (batch_size, 10)) 48 | labels_gan = np.ones((batch_size, 1)) 49 | g_loss = gan.train_on_batch(noise, labels_gan) 50 | 51 | # Generate data to visualize 52 | noise = np.random.normal(0, 1, (1000, 10)) 53 | generated_data = generator.predict(noise) 54 | 55 | # Create the plot 56 | fig, ax = plt.subplots(figsize=(12, 6)) 57 | 58 | ax.hist(real_data, alpha=0.5, label='Real Data') 59 | ax.hist(generated_data, alpha=0.5, label='Generated Data') 60 | ax.set_title('GAN for Simulating Financial Market Conditions') 61 | ax.set_xlabel('Market Returns') 62 | ax.set_ylabel('Frequency') 63 | ax.legend() 64 | 65 | # Model Description and Key Stats 66 | description = '''Model Description: 67 | The GAN model consists of a Generator and a Discriminator. 68 | The Generator tries to produce synthetic financial data, while the Discriminator tries to distinguish between real and synthetic data. 69 | After training, we use the Generator to simulate different market conditions for assessing potential risks associated with various investment strategies.''' 70 | 71 | stats = f'''Key Stats: 72 | - Number of epochs: {epochs} 73 | - Batch size: {batch_size} 74 | - Discriminator Loss: {d_loss[0]:.4f} 75 | - Generator Loss: {g_loss:.4f}''' 76 | 77 | fig.text(0.2, 0.55, description, fontsize=6, bbox={'facecolor': 'white', 'alpha': 0.8, 'pad': 10}) 78 | fig.text(0.65, 0.25, stats, fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.8, 'pad': 10}) 79 | 80 | # Save plot 81 | plt.savefig('GAN_Financial_Simulation.png') 82 | plt.show() 83 | -------------------------------------------------------------------------------- /4. Reinforcement Learning Models/Q_Learning_Stock_Trading_YFinance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/4. Reinforcement Learning Models/Q_Learning_Stock_Trading_YFinance.png -------------------------------------------------------------------------------- /4. Reinforcement Learning Models/README.md: -------------------------------------------------------------------------------- 1 | ![alt text](Q_Learning_Stock_Trading_YFinance.png) 2 | 3 | 4 | -------------------------------------------------------------------------------- /4. Reinforcement Learning Models/q_learning.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # Download stock data from Yahoo Finance 6 | data = yf.download("AAPL", start="2021-01-01", end="2021-02-01")["Close"] 7 | stock_prices = data.values 8 | dates = data.index 9 | 10 | # Initialize Q-Learning parameters 11 | INITIAL_BALANCE = 1000.0 12 | N_TRADING_DAYS = len(stock_prices) 13 | 14 | # Initialize Q-Table 15 | q_table = np.zeros(3) # Buy, Sell, Hold 16 | 17 | # Initialize records for rewards and balances 18 | balances = [INITIAL_BALANCE] 19 | 20 | # Initialize records for actions 21 | actions = [] 22 | 23 | # Hyperparameters 24 | epsilon = 0.2 # Exploration vs Exploitation 25 | lr = 0.1 # Learning rate 26 | gamma = 0.99 # Discount factor 27 | 28 | # Simulation 29 | for day in range(N_TRADING_DAYS - 1): 30 | state = balances[-1] 31 | stock_price = stock_prices[day] 32 | next_stock_price = stock_prices[day + 1] 33 | 34 | # Epsilon-greedy action selection 35 | action = np.random.randint(3) if np.random.rand() < epsilon else np.argmax(q_table) 36 | 37 | # Reward function 38 | reward = 0 39 | if action == 0: # Buy 40 | reward = next_stock_price - stock_price 41 | elif action == 1: # Sell 42 | reward = stock_price - next_stock_price 43 | 44 | # Q-Learning Update 45 | next_state = state + reward 46 | q_table[action] = q_table[action] + lr * (reward + gamma * np.max(q_table) - q_table[action]) 47 | 48 | # Record action and balance 49 | actions.append(action) 50 | balances.append(next_state) 51 | 52 | # Generate Plot 53 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8)) 54 | 55 | # Stock prices with Buy/Sell markers 56 | buy_dates = [dates[i] for i in range(len(actions)) if actions[i] == 0] 57 | sell_dates = [dates[i] for i in range(len(actions)) if actions[i] == 1] 58 | 59 | ax1.plot(dates, stock_prices, label='Stock Price') 60 | ax1.scatter(buy_dates, [stock_prices[i] for i in range(len(actions)) if actions[i] == 0], marker='^', color='g', label='Buy', zorder=5) 61 | ax1.scatter(sell_dates, [stock_prices[i] for i in range(len(actions)) if actions[i] == 1], marker='v', color='r', label='Sell', zorder=5) 62 | 63 | ax1.set_title("Backtest with Buy/Sell Indicators") 64 | ax1.set_xlabel("Date") 65 | ax1.set_ylabel("Stock Price") 66 | ax1.legend() 67 | 68 | # Balances 69 | ax2.plot(dates, [INITIAL_BALANCE] + balances[:-1]) 70 | ax2.set_title("Balance Over Time") 71 | ax2.set_xlabel("Date") 72 | ax2.set_ylabel("Balance") 73 | 74 | # Description and Statistics 75 | statistics = f"""Parameters: 76 | - Number of trading days: {N_TRADING_DAYS} 77 | - Learning rate: {lr} 78 | - Discount factor: {gamma} 79 | 80 | Statistics: 81 | - Final Balance: {balances[-1]:.2f} 82 | """ 83 | 84 | model_description = """This Q-learning model simulates stock trading decisions. 85 | It decides whether to buy, sell, or hold based on the history of stock prices. 86 | Green markers (^) indicate buying points, and red markers (v) indicate selling points.""" 87 | 88 | fig.text(0.2, 0.70, statistics, fontsize=7) 89 | fig.text(0.65, 0.70, model_description, fontsize=6) 90 | 91 | plt.suptitle("Stock Trading Simulation using Q-Learning with Yahoo Finance Data") 92 | plt.tight_layout(rect=[0, 0.03, 1, 0.95]) 93 | 94 | # Save plot 95 | plt.savefig("Q_Learning_Stock_Trading_YFinance.png") 96 | 97 | plt.show() 98 | -------------------------------------------------------------------------------- /5. ML Applications In Finance/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/.DS_Store -------------------------------------------------------------------------------- /5. ML Applications In Finance/01. Risk Management ✅/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/01. Risk Management ✅/.DS_Store -------------------------------------------------------------------------------- /5. ML Applications In Finance/01. Risk Management ✅/1. credit_scoring✅.py: -------------------------------------------------------------------------------- 1 | # Use of supervised algorithms to predict the likelihood of a borrower defaulting on a loan. 2 | 3 | ''' 4 | Python script for credit scoring using machine learning. We'll use scikit-learn to create a model that predicts the creditworthiness 5 | of an individual based on some features like income, age, and loan amount. 6 | ''' 7 | 8 | # Import libraries 9 | import pandas as pd 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | from sklearn.model_selection import train_test_split 13 | from sklearn.ensemble import RandomForestClassifier 14 | from sklearn.metrics import accuracy_score, confusion_matrix 15 | 16 | # Sample data: [income, age, loan_amount] 17 | # The target variable is 'creditworthy', where 1 means creditworthy and 0 means not creditworthy 18 | data = { 19 | 'income': [50000, 75000, 30000, 100000, 65000, 42000, 120000, 110000, 95000, 67000], 20 | 'age': [25, 45, 35, 50, 23, 33, 55, 40, 48, 20], 21 | 'loan_amount': [25000, 50000, 15000, 100000, 45000, 27000, 80000, 38000, 62000, 20000], 22 | 'creditworthy': [1, 1, 0, 1, 0, 0, 1, 1, 1, 0] 23 | } 24 | 25 | # Convert the dictionary into a DataFrame 26 | df = pd.DataFrame(data) 27 | 28 | # Separate the features (X) from the target variable (y) 29 | X = df[['income', 'age', 'loan_amount']] 30 | y = df['creditworthy'] 31 | 32 | # Plotting data points 33 | plt.figure(figsize=(12, 6)) 34 | 35 | plt.subplot(1, 3, 1) 36 | plt.scatter(df['income'], df['creditworthy'], c=df['creditworthy']) 37 | plt.xlabel('Income') 38 | plt.ylabel('Creditworthy') 39 | plt.title('Income vs Creditworthiness') 40 | 41 | plt.subplot(1, 3, 2) 42 | plt.scatter(df['age'], df['creditworthy'], c=df['creditworthy']) 43 | plt.xlabel('Age') 44 | plt.ylabel('Creditworthy') 45 | plt.title('Age vs Creditworthiness') 46 | 47 | plt.subplot(1, 3, 3) 48 | plt.scatter(df['loan_amount'], df['creditworthy'], c=df['creditworthy']) 49 | plt.xlabel('Loan Amount') 50 | plt.ylabel('Creditworthy') 51 | plt.title('Loan Amount vs Creditworthiness') 52 | 53 | plt.tight_layout() 54 | plt.show() 55 | 56 | # Split the data into training and test sets 57 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 58 | 59 | # Initialize the Random Forest Classifier 60 | clf = RandomForestClassifier() 61 | 62 | # Train the model 63 | clf.fit(X_train, y_train) 64 | 65 | # Make predictions on the test set 66 | y_pred = clf.predict(X_test) 67 | 68 | # Evaluate the model 69 | accuracy = accuracy_score(y_test, y_pred) 70 | conf_matrix = confusion_matrix(y_test, y_pred) 71 | 72 | print(f'Accuracy: {accuracy}') 73 | print(f'Confusion Matrix: \n{conf_matrix}') 74 | 75 | # Function to predict if an individual is creditworthy 76 | def predict_creditworthiness(income, age, loan_amount): 77 | prediction = clf.predict([[income, age, loan_amount]])[0] 78 | 79 | if prediction == 1: 80 | return "The individual is creditworthy." 81 | else: 82 | return "The individual is not creditworthy." 83 | 84 | # Example usage of the prediction function 85 | print(predict_creditworthiness(70000, 30, 40000)) # Should generally return "The individual is creditworthy." 86 | print(predict_creditworthiness(30000, 25, 60000)) # Should generally return "The individual is not creditworthy." 87 | 88 | ''' 89 | Here's what each part of the code does: 90 | 91 | Import Libraries: The necessary Python libraries for data manipulation and machine learning are imported. 92 | 93 | Sample Data: We create a DataFrame from a dictionary, where each entry corresponds to an individual's attributes like income, age, and loan amount. The target variable is creditworthy, where 1 means the individual is creditworthy and 0 means they are not. 94 | 95 | Data Splitting: We divide the data into a training set and a test set using the train_test_split() function from scikit-learn. 96 | 97 | Model Initialization and Training: We use a Random Forest Classifier to train our model on the training set. 98 | 99 | Evaluation: We evaluate the model using the test set and print out the accuracy and confusion matrix. 100 | 101 | Prediction Function: We define a function predict_creditworthiness() that uses our trained model to predict whether an individual is creditworthy based on their income, age, and loan amount. 102 | 103 | Example Usage: We call the predict_creditworthiness() function with sample data to demonstrate how to use it. 104 | 105 | Remember that this is just a basic example. In a real-world application, you'd need a much larger dataset and you'd also spend time tuning the model and perhaps using more sophisticated methods for evaluation. 106 | ''' -------------------------------------------------------------------------------- /5. ML Applications In Finance/01. Risk Management ✅/2. value-at-risk_modeling✅.py: -------------------------------------------------------------------------------- 1 | # Estimation of the potential losses an investment portfolio could face over a specified period for a given confidence interval. 2 | 3 | ''' 4 | One common approach is to use a machine learning model to predict future returns, and then calculate VaR based on these predictions. 5 | Below is an example using a Random Forest model to predict future stock returns and subsequently calculate VaR. 6 | ''' 7 | 8 | # Import libraries 9 | import numpy as np 10 | import pandas as pd 11 | import matplotlib.pyplot as plt 12 | from sklearn.ensemble import RandomForestRegressor 13 | from sklearn.model_selection import train_test_split 14 | from sklearn.metrics import mean_squared_error 15 | 16 | # Generate synthetic stock returns data 17 | # In a real-world application, you would fetch this data from a reliable source 18 | np.random.seed(42) 19 | n_data_points = 1000 20 | stock_returns = np.random.normal(0, 1, n_data_points) 21 | 22 | # Create a DataFrame 23 | df = pd.DataFrame(stock_returns, columns=['Returns']) 24 | 25 | # Feature engineering: use lagged returns as features 26 | for i in range(1, 6): 27 | df[f'Lag_{i}'] = df['Returns'].shift(i) 28 | 29 | # Remove NaN 30 | df = df.dropna() 31 | 32 | # Split into features (X) and target (y) 33 | X = df.drop('Returns', axis=1) 34 | y = df['Returns'] 35 | 36 | # Split data into training and test sets 37 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 38 | 39 | # Initialize and train the Random Forest model 40 | model = RandomForestRegressor(n_estimators=100, random_state=42) 41 | model.fit(X_train, y_train) 42 | 43 | # Make predictions 44 | y_pred = model.predict(X_test) 45 | 46 | # Evaluate the model 47 | mse = mean_squared_error(y_test, y_pred) 48 | print(f'Mean Squared Error: {mse}') 49 | 50 | # Calculate VaR 51 | confidence_level = 0.05 52 | VaR = np.quantile(y_pred, confidence_level) 53 | 54 | print(f'Value-at-Risk (VaR) at {confidence_level * 100}% confidence level is {VaR}') 55 | 56 | # Plot predicted returns and VaR 57 | plt.figure(figsize=(10, 6)) 58 | plt.hist(y_pred, bins=30, alpha=0.75, color='blue', label='Predicted Returns') 59 | plt.axvline(x=VaR, color='r', linestyle='--', label=f'VaR at {confidence_level * 100}% confidence level') 60 | plt.xlabel('Predicted Return') 61 | plt.ylabel('Frequency') 62 | plt.title('Value-at-Risk (VaR) using Machine Learning') 63 | plt.legend() 64 | plt.show() 65 | 66 | 67 | ''' 68 | Explanation: 69 | 70 | Generate Synthetic Stock Returns: The code generates synthetic stock return data for demonstration purposes. 71 | 72 | Feature Engineering: Lagged returns are used as features for the machine learning model. 73 | 74 | Train-Test Split: The data is split into training and test sets. 75 | 76 | Random Forest Model: A Random Forest Regressor model is trained on the data. 77 | 78 | Prediction: The model predicts future returns on the test set. 79 | 80 | Evaluation: The model is evaluated using Mean Squared Error (MSE). 81 | 82 | Calculate VaR: VaR is calculated based on predicted returns using the numpy quantile function. 83 | 84 | Plot: The predicted returns and VaR are plotted. 85 | 86 | The red line in the plot indicates the VaR at a 5% confidence level. According to this model, we are 95% confident that the worst daily loss will not exceed this value. 87 | 88 | Note: This is a very simplified example for demonstration purposes. In a real-world scenario, the data would be more complex, and additional steps such as data normalization, hyperparameter tuning, and validation would be necessary. 89 | ''' -------------------------------------------------------------------------------- /5. ML Applications In Finance/01. Risk Management ✅/3. fraud_detection✅.py: -------------------------------------------------------------------------------- 1 | # Anomaly detection to identify unusual patterns which could suggest fraudulent transactions. 2 | 3 | ''' 4 | a Python script that demonstrates a simple approach to fraud detection using machine learning. 5 | In this example, we'll use the RandomForestClassifier from scikit-learn to classify transactions as either "fraudulent" or "genuine". 6 | ''' 7 | 8 | # Import Libraries 9 | import numpy as np 10 | import pandas as pd 11 | import matplotlib.pyplot as plt 12 | from sklearn.model_selection import train_test_split 13 | from sklearn.ensemble import RandomForestClassifier 14 | from sklearn.metrics import accuracy_score, confusion_matrix, classification_report 15 | from sklearn.preprocessing import StandardScaler 16 | 17 | # Generate synthetic data for demonstration 18 | # In a real-world application, replace this with actual data 19 | np.random.seed(42) 20 | n_samples = 1000 21 | 22 | # Genuine transactions are centered around (0, 0) 23 | genuine = np.random.normal(0, 1, (int(n_samples * 0.95), 2)) 24 | genuine_labels = np.zeros(int(n_samples * 0.95)) 25 | 26 | # Fraudulent transactions are centered around (5, 5) 27 | fraud = np.random.normal(5, 1, (int(n_samples * 0.05), 2)) 28 | fraud_labels = np.ones(int(n_samples * 0.05)) 29 | 30 | # Combine into one dataset 31 | X = np.vstack([genuine, fraud]) 32 | y = np.hstack([genuine_labels, fraud_labels]) 33 | 34 | # Data Preprocessing: Feature Scaling 35 | scaler = StandardScaler() 36 | X = scaler.fit_transform(X) 37 | 38 | # Split data into training and test sets 39 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) 40 | 41 | # Initialize and train RandomForest Classifier 42 | clf = RandomForestClassifier(random_state=42) 43 | clf.fit(X_train, y_train) 44 | 45 | # Predict on test set 46 | y_pred = clf.predict(X_test) 47 | 48 | # Evaluation Metrics 49 | print(f'Accuracy: {accuracy_score(y_test, y_pred)}') 50 | print(f'Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}') 51 | print(f'Classification Report: \n{classification_report(y_test, y_pred)}') 52 | 53 | # Plotting 54 | plt.figure(figsize=(10, 6)) 55 | 56 | # Plot genuine transactions 57 | plt.scatter(X_test[y_test == 0][:, 0], X_test[y_test == 0][:, 1], label='Genuine', alpha=0.5) 58 | 59 | # Plot fraudulent transactions 60 | plt.scatter(X_test[y_test == 1][:, 0], X_test[y_test == 1][:, 1], label='Fraud', alpha=0.5) 61 | 62 | # Highlight false negatives 63 | plt.scatter(X_test[(y_test == 1) & (y_pred == 0)][:, 0], X_test[(y_test == 1) & (y_pred == 0)][:, 1], s=100, 64 | facecolors='none', edgecolors='r', label='False Negative') 65 | 66 | # Highlight false positives 67 | plt.scatter(X_test[(y_test == 0) & (y_pred == 1)][:, 0], X_test[(y_test == 0) & (y_pred == 1)][:, 1], s=100, 68 | facecolors='none', edgecolors='m', label='False Positive') 69 | 70 | plt.xlabel('Feature 1') 71 | plt.ylabel('Feature 2') 72 | plt.title('Fraud Detection') 73 | plt.legend() 74 | plt.show() 75 | 76 | ''' 77 | Explanation: 78 | 79 | Data Generation: We're creating synthetic data for both genuine and fraudulent transactions. In practice, you would replace this with your actual data. 80 | 81 | Feature Scaling: Using the StandardScaler from scikit-learn to normalize features, which is often necessary for machine learning algorithms. 82 | 83 | Train-Test Split: We're splitting the data into training and test sets, with 20% of the data reserved for testing. 84 | 85 | Random Forest Classifier: A simple Random Forest model is trained on the training data. 86 | 87 | Prediction and Evaluation: We then use the trained model to make predictions on the test set, and print evaluation metrics like accuracy, confusion matrix, and classification report. 88 | 89 | Plotting: Finally, we plot the test data, highlighting genuine and fraudulent transactions. We also indicate false positives and false negatives. 90 | 91 | This is a simplified example meant for demonstration. Real-world fraud detection models would involve far more complexity, such as dealing with imbalanced data, feature engineering, hyperparameter tuning, and possibly using more advanced algorithms. 92 | ''' -------------------------------------------------------------------------------- /5. ML Applications In Finance/01. Risk Management ✅/4. operational_risk_modeling✅.py: -------------------------------------------------------------------------------- 1 | # Predictive models to identify potential operational hazards and risks. 2 | 3 | ''' 4 | a Python script that demonstrates how to use machine learning to predict operational hazards based on synthetic features. 5 | 6 | In this example, I'll use a RandomForestClassifier from scikit-learn to create a predictive model. 7 | The target variable is a binary outcome representing whether or not an operational hazard exists (1 for hazard, 0 for no hazard). 8 | ''' 9 | 10 | # Import Libraries 11 | import numpy as np 12 | import pandas as pd 13 | import matplotlib.pyplot as plt 14 | from sklearn.model_selection import train_test_split 15 | from sklearn.ensemble import RandomForestClassifier 16 | from sklearn.metrics import accuracy_score, confusion_matrix, classification_report 17 | from sklearn.preprocessing import StandardScaler 18 | 19 | # Generate synthetic data for demonstration 20 | # In a real-world application, you would use actual data 21 | np.random.seed(42) 22 | n_samples = 1000 23 | 24 | # Non-hazardous situations with features centered around (2, 2) 25 | non_hazard = np.random.normal(2, 1, (int(n_samples * 0.7), 2)) 26 | non_hazard_labels = np.zeros(int(n_samples * 0.7)) 27 | 28 | # Hazardous situations with features centered around (5, 5) 29 | hazard = np.random.normal(5, 1, (int(n_samples * 0.3), 2)) 30 | hazard_labels = np.ones(int(n_samples * 0.3)) 31 | 32 | # Combine into one dataset 33 | X = np.vstack([non_hazard, hazard]) 34 | y = np.hstack([non_hazard_labels, hazard_labels]) 35 | 36 | # Feature Scaling 37 | scaler = StandardScaler() 38 | X = scaler.fit_transform(X) 39 | 40 | # Split into training and test sets 41 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) 42 | 43 | # Initialize and train the RandomForestClassifier 44 | clf = RandomForestClassifier(random_state=42) 45 | clf.fit(X_train, y_train) 46 | 47 | # Make predictions 48 | y_pred = clf.predict(X_test) 49 | 50 | # Evaluation Metrics 51 | print(f'Accuracy: {accuracy_score(y_test, y_pred)}') 52 | print(f'Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}') 53 | print(f'Classification Report: \n{classification_report(y_test, y_pred)}') 54 | 55 | # Plotting the results 56 | plt.figure(figsize=(10, 6)) 57 | 58 | # Plot non-hazardous situations 59 | plt.scatter(X_test[y_test == 0][:, 0], X_test[y_test == 0][:, 1], label='Non-Hazard', alpha=0.6) 60 | 61 | # Plot hazardous situations 62 | plt.scatter(X_test[y_test == 1][:, 0], X_test[y_test == 1][:, 1], label='Hazard', alpha=0.6) 63 | 64 | # Highlight false negatives 65 | plt.scatter(X_test[(y_test == 1) & (y_pred == 0)][:, 0], X_test[(y_test == 1) & (y_pred == 0)][:, 1], s=100, 66 | facecolors='none', edgecolors='r', label='False Negative') 67 | 68 | # Highlight false positives 69 | plt.scatter(X_test[(y_test == 0) & (y_pred == 1)][:, 0], X_test[(y_test == 0) & (y_pred == 1)][:, 1], s=100, 70 | facecolors='none', edgecolors='m', label='False Positive') 71 | 72 | plt.xlabel('Feature 1') 73 | plt.ylabel('Feature 2') 74 | plt.title('Operational Hazard Prediction') 75 | plt.legend() 76 | plt.show() 77 | 78 | ''' 79 | Explanation: 80 | 81 | Data Generation: Synthetic data is generated to simulate operational hazards. In practice, you would use actual operational data with relevant features. 82 | 83 | Feature Scaling: Features are scaled using StandardScaler from scikit-learn. This is often necessary for machine learning algorithms. 84 | 85 | Train-Test Split: The dataset is split into a training set and a test set. 86 | 87 | Random Forest Classifier: A RandomForestClassifier is trained on the training set. 88 | 89 | Prediction and Evaluation: The model is used to make predictions on the test set, and several evaluation metrics are printed. 90 | 91 | Plotting: A scatter plot of the test set is created where true positives, true negatives, false positives, and false negatives are marked. This provides a visual insight into how well the model is performing. 92 | 93 | This is a simplified example intended for demonstration purposes. A real-world application would include more steps like feature engineering, dealing with imbalanced data, hyperparameter tuning, and perhaps the use of more advanced machine learning algorithms. 94 | ''' -------------------------------------------------------------------------------- /5. ML Applications In Finance/02. Asset Management ✅/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/02. Asset Management ✅/.DS_Store -------------------------------------------------------------------------------- /5. ML Applications In Finance/02. Asset Management ✅/1. portfolio_optimization✅.py: -------------------------------------------------------------------------------- 1 | # Models to maximize returns for a given level of risk. 2 | 3 | ''' 4 | Portfolio optimization using machine learning involves selecting the mix of investment assets that is statistically likely 5 | to achieve a desired return for a given level of risk. One of the popular methods for this is the use of the 6 | Mean-Variance Optimization model. Here, I'll show you a simplified example using Random Forest Regression to predict future returns, 7 | and then optimizing the portfolio based on those returns. 8 | ''' 9 | 10 | import streamlit as st 11 | import yfinance as yf 12 | import numpy as np 13 | import pandas as pd 14 | import matplotlib.pyplot as plt 15 | from scipy.optimize import minimize 16 | from pandas_datareader import data as pdr 17 | import datetime 18 | import concurrent.futures 19 | 20 | # Fetch stock names using multi-threading 21 | def get_stock_name(ticker): 22 | stock = yf.Ticker(ticker) 23 | info = stock.info 24 | return info.get('shortName') or info.get('longName') or ticker 25 | 26 | def fetch_all_stock_names(tickers): 27 | ticker_to_name = {} 28 | with concurrent.futures.ThreadPoolExecutor() as executor: 29 | future_to_ticker = {executor.submit(get_stock_name, ticker): ticker for ticker in tickers} 30 | for future in concurrent.futures.as_completed(future_to_ticker): 31 | ticker = future_to_ticker[future] 32 | try: 33 | ticker_to_name[ticker] = future.result() 34 | except Exception as e: 35 | print(f"Could not fetch name for {ticker}: {e}") 36 | ticker_to_name[ticker] = ticker 37 | return ticker_to_name 38 | 39 | # Streamlit title and setup 40 | st.title("Portfolio Optimization App") 41 | 42 | # Timeframe 43 | timeframe = st.selectbox('Select Timeframe:', ['1Y', '2Y', '3Y']) 44 | start_date = str((datetime.datetime.now() - pd.DateOffset(years=int(timeframe[0]))).date()) 45 | end_date = str(datetime.datetime.now().date()) 46 | 47 | # Categorized Tickers 48 | all_tickers = { 49 | 'Stocks': ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA"], 50 | 'Stock ETFs': ["SPY", "QQQ", "EFA", "IWM", "EEM"], 51 | 'Commodities': ["GLD", "SLV", "USO"], 52 | 'Bond ETFs': ["TLT"], 53 | 'Real Estate ETFs': ["VNQ"], 54 | 'Highlighted ETFs': ["0P0000XMRD.L", "0P0000KSPA.L", "0P000023MW.L", "0P000185T1.L", "0P0000TKZG.L"], 55 | 'Highlighted Stocks': ["NTDOY", "PLTK", "INSE", "SCPL", "EA"] 56 | } 57 | 58 | all_tickers_flat = [item for sublist in all_tickers.values() for item in sublist] 59 | ticker_to_name = fetch_all_stock_names(all_tickers_flat) 60 | 61 | # Selection 62 | selected_tickers = [] 63 | for category, tickers in all_tickers.items(): 64 | st.write(f"## {category}") 65 | selected = st.multiselect('', tickers, format_func=lambda x: f"{ticker_to_name[x]} ({x})") 66 | selected_tickers.extend(selected) 67 | 68 | # Make sure SPY is in the selected_tickers for benchmarking 69 | if 'SPY' not in selected_tickers: 70 | selected_tickers.append('SPY') 71 | 72 | # Download stock data 73 | def download_data(ticker_list, start_date, end_date): 74 | data = yf.download(ticker_list, start=start_date, end=end_date)['Adj Close'] 75 | return data.pct_change().dropna() 76 | 77 | # Portfolio optimization 78 | def optimize_portfolio(returns): 79 | def objective(weights): 80 | portfolio_return = np.sum(returns.mean() * weights) * 252 81 | portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights))) 82 | return -portfolio_return / portfolio_volatility 83 | 84 | initial_weights = [1. / len(returns.columns)] * len(returns.columns) 85 | bounds = tuple((0, 1) for asset in range(len(returns.columns))) 86 | constraints = [{'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1}] 87 | 88 | solution = minimize(objective, initial_weights, bounds=bounds, constraints=constraints) 89 | return solution.x 90 | 91 | # Download data 92 | data = download_data(selected_tickers, start_date, end_date) 93 | 94 | # Optimize portfolio 95 | optimal_weights = optimize_portfolio(data) 96 | 97 | # Display Results 98 | st.write("## Final Portfolio Allocation:") 99 | allocation_df = pd.DataFrame({ 100 | 'Asset': [ticker_to_name[ticker] for ticker in selected_tickers], 101 | 'Ticker': selected_tickers, 102 | 'Weights': optimal_weights 103 | }) 104 | st.table(allocation_df) 105 | 106 | # Pie Chart (Only include assets with at least 1% weight) 107 | significant_weights = optimal_weights[optimal_weights >= 0.01] 108 | significant_tickers = np.array(selected_tickers)[optimal_weights >= 0.01] 109 | 110 | st.write("## Portfolio Allocation Chart:") 111 | fig, ax = plt.subplots() 112 | ax.pie(significant_weights, labels=[ticker_to_name[ticker] for ticker in significant_tickers], autopct='%1.1f%%') 113 | ax.axis('equal') 114 | st.pyplot(fig) 115 | 116 | # Performance vs. SPY 117 | cumulative_portfolio_return = (data * optimal_weights).sum(axis=1).add(1).cumprod().sub(1) 118 | cumulative_spy_return = data['SPY'].add(1).cumprod().sub(1) 119 | 120 | fig, ax = plt.subplots() 121 | cumulative_portfolio_return.plot(ax=ax, label='Portfolio') 122 | cumulative_spy_return.plot(ax=ax, label='SPY') 123 | plt.legend() 124 | plt.title("Portfolio Performance vs. SPY") 125 | st.pyplot(fig) 126 | 127 | # Calculate Sharpe Ratio 128 | risk_free_data = pdr.get_data_fred('GS3M', start_date, end_date) 129 | risk_free_data_monthly = risk_free_data.resample('M').mean() 130 | risk_free_data_monthly.interpolate(method='linear', inplace=True) 131 | risk_free_data_aligned = risk_free_data_monthly.reindex(data.index, method='pad') / 100 / 252 132 | 133 | portfolio_return = (data * optimal_weights).sum(axis=1) 134 | excess_portfolio_return = portfolio_return.sub(risk_free_data_aligned['GS3M'].squeeze(), axis=0) 135 | 136 | sharpe_ratio = np.sqrt(252) * (excess_portfolio_return.mean() / excess_portfolio_return.std()) 137 | st.write(f"## Annualized Sharpe Ratio: {sharpe_ratio:.4f}") 138 | 139 | 140 | # Note: This is a simplified example. Always consult with a financial advisor before making any investment decisions. 141 | 142 | # To run, do: 143 | # "streamlit run 1.\ portfolio_optimization.py" 144 | 145 | 146 | 147 | ''' 148 | Explanation: 149 | 150 | Synthetic Data: The example uses synthetic stock return data. In a real-world scenario, you'd use historical stock return data. 151 | UPDATE: Replaced with yfinance data. 152 | 153 | User Input: The desired annual return and risk tolerance can be input by the user. 154 | 155 | Data Splitting: The data is split into training and test sets. We use the training set to train our machine learning model. 156 | 157 | Random Forest Regressor: We use Random Forest to predict the future returns of each asset in the test dataset. 158 | 159 | Predictions and Plot: We make predictions using the trained models and plot the real vs. predicted returns of the first asset. 160 | 161 | Optimization: Using predicted returns, we optimize the portfolio weights to maximize the Sharpe Ratio subject to the user-defined constraints on return and risk. This is done using the minimize function from scipy's optimize module. 162 | 163 | Optimal Weights: Finally, the script prints the optimal weights for asset allocation to achieve the user's financial goals. 164 | ''' -------------------------------------------------------------------------------- /5. ML Applications In Finance/02. Asset Management ✅/2. algorithmic_trading✅.py: -------------------------------------------------------------------------------- 1 | # Utilizing algorithms and quantitative models to execute trades at optimal prices. 2 | # Algorithmic Trading Script Using Random Forest 3 | 4 | import pandas as pd 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import yfinance as yf 8 | from sklearn.model_selection import train_test_split 9 | from sklearn.ensemble import RandomForestClassifier 10 | from sklearn.metrics import accuracy_score 11 | 12 | # Fetching data using yfinance 13 | ticker = 'AAPL' 14 | start_date = '2010-01-01' 15 | end_date = '2023-01-01' 16 | 17 | data = yf.download(ticker, start=start_date, end=end_date) 18 | 19 | # Feature Engineering 20 | 21 | data['Close_Lag1'] = data['Close'].shift(1) 22 | data['Return'] = (data['Close'] - data['Close_Lag1']) / data['Close_Lag1'] 23 | data['MA5'] = data['Close'].rolling(window=5).mean() 24 | data['MA10'] = data['Close'].rolling(window=10).mean() 25 | data['MA_Diff'] = data['MA5'] - data['MA10'] 26 | data['Momentum'] = data['Close'] - data['Close'].shift(4) 27 | data['Volatility'] = data['Return'].rolling(window=5).std() 28 | data.dropna(inplace=True) 29 | data['Target'] = (data['Return'] > 0).astype(int) 30 | 31 | features = ['Close', 'Close_Lag1', 'MA5', 'MA10', 'MA_Diff', 'Momentum', 'Volatility'] 32 | X = data[features] 33 | y = data['Target'] 34 | 35 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False) 36 | 37 | rf_clf = RandomForestClassifier(n_estimators=100) 38 | rf_clf.fit(X_train, y_train) 39 | 40 | y_pred = rf_clf.predict(X_test) 41 | accuracy = accuracy_score(y_test, y_pred) 42 | print(f"Accuracy [Random Forest]: {accuracy:.2f}") 43 | 44 | data['RF_Predicted_Signal'] = np.nan 45 | data.iloc[(len(data) - len(y_pred)):, data.columns.get_loc('RF_Predicted_Signal')] = y_pred 46 | data['RF_Strategy_Return'] = data['Return'] * (data['RF_Predicted_Signal'] * 2 - 1) 47 | data['RF_Cumulative_Strategy_Returns'] = (1 + data['RF_Strategy_Return']).cumprod() 48 | 49 | data['Cumulative_Market_Returns'] = (1 + data['Return']).cumprod() 50 | 51 | # Plot 52 | fig, ax = plt.subplots(figsize=(15, 10)) 53 | 54 | # Plot stock prices 55 | ax.plot(data.index, data['Close'], color='g', label='Stock Price', alpha=0.5) 56 | 57 | # Plot strategy and market returns 58 | test_data_start = data.iloc[len(data) - len(y_pred):].index[0] 59 | ax.plot(data.loc[test_data_start:]['RF_Cumulative_Strategy_Returns'], color='b', label='Random Forest Strategy Returns') 60 | ax.plot(data.loc[test_data_start:]['Cumulative_Market_Returns'], color='r', label='Buy and Hold Returns') 61 | ax.legend(loc="upper left") 62 | ax.set_ylabel('Value') 63 | 64 | plt.tight_layout() 65 | plt.show() 66 | -------------------------------------------------------------------------------- /5. ML Applications In Finance/02. Asset Management ✅/3. index_tracking✅.py: -------------------------------------------------------------------------------- 1 | # Algorithms to create a portfolio that closely follows a particular index. 2 | 3 | ''' 4 | Creating a portfolio that tracks an index is the basis for index funds and ETFs. One way to do this is by finding the optimal weights of the stocks in the index such that the tracking error is minimized. 5 | 6 | In this script, I'll outline the following steps: 7 | 8 | 1. Fetch the historical data of stocks in the index using yfinance. 9 | 2. Calculate the returns of each stock. 10 | 3. Define an optimization problem to find the optimal weights that minimize the tracking error. 11 | 4. Plot the actual index performance vs. our portfolio's performance. 12 | ''' 13 | 14 | import yfinance as yf 15 | import numpy as np 16 | import pandas as pd 17 | import matplotlib.pyplot as plt 18 | from scipy.optimize import minimize 19 | 20 | # 1. Fetch Data 21 | # Let's say we want to track the S&P 500. We'll take a subset of companies for simplicity. 22 | tickers = ['AAPL', 'MSFT', 'GOOGL', '^GSPC'] # ^GSPC is the ticker for S&P 500 23 | data = yf.download(tickers, start="2020-01-01", end="2023-01-01")['Adj Close'] 24 | 25 | # 2. Calculate Returns 26 | returns = data.pct_change().dropna() 27 | 28 | # 3. Optimization 29 | def tracking_error(weights: np.array) -> float: 30 | # Calculate the portfolio returns given the weights 31 | port_returns = returns.iloc[:, :-1].dot(weights) 32 | # Calculate the tracking error 33 | error = np.sum((port_returns - returns['^GSPC'])**2) 34 | return error 35 | 36 | # Constraints and bounds 37 | cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1}) 38 | bounds = [(0, 1) for _ in range(returns.shape[1] - 1)] # the number of stocks minus one for the S&P 500 index column 39 | 40 | # Minimize the negative Sharpe Ratio to get maximum Sharpe ratio 41 | initial_guess = [1./len(tickers) for _ in tickers[:-1]] # equal weights as an initial guess 42 | result = minimize(tracking_error, initial_guess, bounds=bounds, constraints=cons) 43 | 44 | # Extract the optimal weights 45 | optimal_weights = result.x 46 | 47 | # 4. Plot 48 | # Calculate portfolio with optimal weights 49 | data['Portfolio'] = data.iloc[:, :-1].dot(optimal_weights) 50 | normalized_data = data / data.iloc[0] # Normalize data for better visualization 51 | 52 | plt.figure(figsize=(14, 7)) 53 | normalized_data['^GSPC'].plot(label='S&P 500') 54 | normalized_data['Portfolio'].plot(label='Tracked Portfolio') 55 | plt.title('Index Tracking') 56 | plt.xlabel('Date') 57 | plt.ylabel('Normalized Value') 58 | plt.legend() 59 | plt.grid(True) 60 | plt.show() 61 | 62 | # Display optimal weights 63 | print("Optimal Weights:", optimal_weights) 64 | -------------------------------------------------------------------------------- /5. ML Applications In Finance/02. Asset Management ✅/4. pairs_trading✅.py: -------------------------------------------------------------------------------- 1 | # Identifying pairs of assets whose prices have a statistical relationship, used for arbitrage 2 | 3 | ''' 4 | Pairs trading is a strategy that identifies pairs of assets (typically stocks) that are historically price correlated. 5 | When their prices deviate substantially, one stock is shorted while the other is bought, with the expectation that the two prices will converge again. 6 | 7 | Here's a basic outline for a Pairs Trading strategy: 8 | 9 | 1. Data Collection: Fetch historical data for a set of potential pairs. 10 | 2. Pair Selection: Identify pairs with a strong statistical relationship. 11 | 3. Signal Generation: Determine entry (long/short) and exit points based on a Z-score of the spread. 12 | 4. Trade Execution & Management: Execute the trades and manage the positions. 13 | ''' 14 | 15 | import yfinance as yf 16 | import numpy as np 17 | import pandas as pd 18 | import matplotlib.pyplot as plt 19 | from statsmodels.tsa.stattools import coint 20 | 21 | # 1. Data Collection 22 | tickers = ['DAL', 'AAL', 'UAL', 'LUV'] # airline stocks as an example 23 | data = yf.download(tickers, start="2020-01-01", end="2023-01-01")['Adj Close'] 24 | data = data.fillna(method='ffill') # forward-fill missing values 25 | 26 | # 2. Pair Selection 27 | def find_cointegrated_pairs(data, pvalue_threshold=0.1): # Adjusted threshold 28 | n = data.shape[1] 29 | score_matrix = np.zeros((n, n)) 30 | pvalue_matrix = np.ones((n, n)) 31 | keys = data.keys() 32 | pairs = [] 33 | for i in range(n): 34 | for j in range(i+1, n): 35 | S1 = data[keys[i]] 36 | S2 = data[keys[j]] 37 | result = coint(S1, S2) 38 | score = result[0] 39 | pvalue = result[1] 40 | score_matrix[i, j] = score 41 | pvalue_matrix[i, j] = pvalue 42 | if pvalue < pvalue_threshold: # P-value threshold 43 | pairs.append((keys[i], keys[j])) 44 | return score_matrix, pvalue_matrix, pairs 45 | 46 | _, _, pairs = find_cointegrated_pairs(data) 47 | print("Cointegrated pairs:", pairs) 48 | 49 | # Check if we have any cointegrated pairs before proceeding 50 | if not pairs: 51 | print("No cointegrated pairs found!") 52 | exit() 53 | 54 | # For demonstration, let's use the first cointegrated pair. 55 | S1 = data[pairs[0][0]] 56 | S2 = data[pairs[0][1]] 57 | 58 | # Calculate the spread 59 | spread = S1 - S2 60 | spread_mean = spread.mean() 61 | spread_std = spread.std() 62 | 63 | # 3. Signal Generation 64 | zscore = (spread - spread_mean) / spread_std 65 | entry_threshold = 1.5 66 | exit_threshold = 0.5 67 | 68 | # Buy when zscore < -entry_threshold, sell when zscore > -exit_threshold 69 | # Short when zscore > entry_threshold, cover when zscore < exit_threshold 70 | longs = (zscore < -entry_threshold) & (zscore.shift(1) > -entry_threshold) 71 | shorts = (zscore > entry_threshold) & (zscore.shift(1) < entry_threshold) 72 | exits = (np.abs(zscore) < exit_threshold) 73 | 74 | # 4. Plotting 75 | plt.figure(figsize=(15,7)) 76 | 77 | S1[longs].plot(marker='^', markersize=10, color='g', linestyle='None', alpha=0.7) 78 | S1[shorts].plot(marker='v', markersize=10, color='r', linestyle='None', alpha=0.7) 79 | S1[exits].plot(marker='o', markersize=6, color='b', linestyle='None', alpha=0.7) 80 | S1.plot(color='b') 81 | S2.plot(color='c') 82 | plt.xlabel('Date') 83 | plt.ylabel('Price') 84 | plt.title('Pairs Trading') 85 | plt.legend([pairs[0][0], pairs[0][1], 'Buy Signal', 'Sell Signal', 'Exit Signal']) 86 | plt.show() 87 | 88 | -------------------------------------------------------------------------------- /5. ML Applications In Finance/02. Asset Management ✅/requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | yfinance 3 | numpy 4 | pandas 5 | matplotlib 6 | scipy 7 | pandas_datareader -------------------------------------------------------------------------------- /5. ML Applications In Finance/03. Market Analysis And Prediction/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/03. Market Analysis And Prediction/.DS_Store -------------------------------------------------------------------------------- /5. ML Applications In Finance/03. Market Analysis And Prediction/1. price_forecasting.py: -------------------------------------------------------------------------------- 1 | # Using time-series models to predict future stock prices or market trends. -------------------------------------------------------------------------------- /5. ML Applications In Finance/03. Market Analysis And Prediction/2. sentiment_analysis.py: -------------------------------------------------------------------------------- 1 | # Analyzing news articles or social media to gauge market sentiment. -------------------------------------------------------------------------------- /5. ML Applications In Finance/03. Market Analysis And Prediction/3. option_pricing.py: -------------------------------------------------------------------------------- 1 | # Using computational methods to fair-value options and other derivatives. -------------------------------------------------------------------------------- /5. ML Applications In Finance/03. Market Analysis And Prediction/4. order_flow_prediction.py: -------------------------------------------------------------------------------- 1 | # Predicting the future order flow (buy/sell) based on existing order books. 2 | -------------------------------------------------------------------------------- /5. ML Applications In Finance/04. Customer Service/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/04. Customer Service/.DS_Store -------------------------------------------------------------------------------- /5. ML Applications In Finance/04. Customer Service/1. chatbots.py: -------------------------------------------------------------------------------- 1 | # Automated conversational agents for customer service. -------------------------------------------------------------------------------- /5. ML Applications In Finance/04. Customer Service/2. personal_finance_management.py: -------------------------------------------------------------------------------- 1 | # Recommender systems for personalized financial planning and product recommendations. 2 | 3 | -------------------------------------------------------------------------------- /5. ML Applications In Finance/04. Customer Service/3. customer_segmentation.py: -------------------------------------------------------------------------------- 1 | # Identifying different customer segments to offer tailored products. -------------------------------------------------------------------------------- /5. ML Applications In Finance/05. Compliance and Regulatory/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/05. Compliance and Regulatory/.DS_Store -------------------------------------------------------------------------------- /5. ML Applications In Finance/05. Compliance and Regulatory/1. anti_money_laundering_AML.py: -------------------------------------------------------------------------------- 1 | # Detecting potentially illegal actions through transaction monitoring. -------------------------------------------------------------------------------- /5. ML Applications In Finance/05. Compliance and Regulatory/2. regulatory_reporting_automation.py: -------------------------------------------------------------------------------- 1 | # Automated systems for generating regulatory reports. -------------------------------------------------------------------------------- /5. ML Applications In Finance/05. Compliance and Regulatory/3. insider_trading_detection.py: -------------------------------------------------------------------------------- 1 | # Identifying suspicious trading patterns using anomaly detection. -------------------------------------------------------------------------------- /5. ML Applications In Finance/06. Real Estate/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/06. Real Estate/.DS_Store -------------------------------------------------------------------------------- /5. ML Applications In Finance/06. Real Estate/1. property_valuation.py: -------------------------------------------------------------------------------- 1 | # Automated valuation models for real estate pricing. -------------------------------------------------------------------------------- /5. ML Applications In Finance/06. Real Estate/2. investment_analysis.py: -------------------------------------------------------------------------------- 1 | # Evaluating the potential return on investment for different real estate properties. -------------------------------------------------------------------------------- /5. ML Applications In Finance/07. Supply Chain Finance/supply_chain_finance.py: -------------------------------------------------------------------------------- 1 | # Optimization of supply chain processes and financing. -------------------------------------------------------------------------------- /5. ML Applications In Finance/08. Invoice Management/invoice_management.py: -------------------------------------------------------------------------------- 1 | # Automated processing and management of invoices. -------------------------------------------------------------------------------- /5. ML Applications In Finance/09. Cash Management/cash_management.py: -------------------------------------------------------------------------------- 1 | # Algorithms for optimal cash reserves and investment. -------------------------------------------------------------------------------- /5. ML Applications In Finance/10. Decentralized Finance (DEFI)/1.yield_farming_optimizer.py: -------------------------------------------------------------------------------- 1 | # Write a script that interfaces with various DeFi protocols to find the best yield farming opportunities. 2 | # Use APIs to get real-time data on interest rates and automatically suggest or even reallocate assets. -------------------------------------------------------------------------------- /5. ML Applications In Finance/10. Decentralized Finance (DEFI)/2.smart_contract_auditor.py: -------------------------------------------------------------------------------- 1 | # Build a tool to evaluate the security and efficiency of smart contracts on blockchain platforms like Ethereum. 2 | # Utilize formal verification or static analysis libraries to identify vulnerabilities. -------------------------------------------------------------------------------- /5. ML Applications In Finance/11. Environmental Social And Governance Investing (ESG)/1.sustainability_analytics.py: -------------------------------------------------------------------------------- 1 | # Use Natural Language Processing (NLP) to analyze a company's sustainability reports and practices. 2 | # Quantify metrics and present them in a user-friendly dashboard. -------------------------------------------------------------------------------- /5. ML Applications In Finance/11. Environmental Social And Governance Investing (ESG)/2.impact_measurement.py: -------------------------------------------------------------------------------- 1 | # Develop a model to calculate the environmental and social impact of a company based on publicly available data, 2 | # such as emissions reports and community involvement. -------------------------------------------------------------------------------- /5. ML Applications In Finance/12. Behavioural Economics/1.nudges.py: -------------------------------------------------------------------------------- 1 | # Design a Python script that integrates with personal finance apps to provide real-time "nudges" 2 | # to encourage saving or responsible spending based on user behavior. -------------------------------------------------------------------------------- /5. ML Applications In Finance/12. Behavioural Economics/2.investor_sentiment_models.py: -------------------------------------------------------------------------------- 1 | # Use machine learning to analyze sentiment data from social media platforms and news articles to model investor sentiment. 2 | # See how sentiment correlates with stock price movements. -------------------------------------------------------------------------------- /5. ML Applications In Finance/13. Blockchain And Cryptocurrency/cryptocurrency_price_prediction.py: -------------------------------------------------------------------------------- 1 | # Utilize machine learning models like LSTM to predict cryptocurrency prices based on historical data and other indicators. -------------------------------------------------------------------------------- /5. ML Applications In Finance/14. Explainable AI For Finance/model_interpretability.py: -------------------------------------------------------------------------------- 1 | # Create a machine learning model for predicting stock prices or credit scores, and incorporate SHAP (Shapley Additive exPlanations) 2 | # or LIME (Local Interpretable Model-agnostic Explanations) for interpretability. -------------------------------------------------------------------------------- /5. ML Applications In Finance/15. Robotic Process Automation (RPA)/automated_invoice_processing.py: -------------------------------------------------------------------------------- 1 | # Use Optical Character Recognition (OCR) to read invoices and input data into a database or accounting software. -------------------------------------------------------------------------------- /5. ML Applications In Finance/16. Textual And Alternative Data For Finance/news_analytics.py: -------------------------------------------------------------------------------- 1 | # Create a real-time dashboard that uses NLP to analyze financial news for keywords and sentiments that could be trading signals. -------------------------------------------------------------------------------- /5. ML Applications In Finance/16. Textual And Alternative Data For Finance/reddit_sentiment_and_market_trends.py: -------------------------------------------------------------------------------- 1 | # Scrape Reddit data to perform sentiment analysis and correlate this with stock or cryptocurrency trends. -------------------------------------------------------------------------------- /5. ML Applications In Finance/17. Fundamental Anaysis/automated_10kand10Q_parser.py: -------------------------------------------------------------------------------- 1 | # Write a script that uses NLP to automatically extract and summarize key financial metrics and textual insights from companies' 2 | # 10-K and 10-Q reports. -------------------------------------------------------------------------------- /5. ML Applications In Finance/17. Fundamental Anaysis/financial_ratios_dashboard.py: -------------------------------------------------------------------------------- 1 | # Develop a dashboard that displays crucial financial ratios, automatically calculated from a company's balance sheet, 2 | # income statement, and cash flow statement. -------------------------------------------------------------------------------- /5. ML Applications In Finance/18. Satellite Image Analysis For Finance/agricultural_yield_prediction.py: -------------------------------------------------------------------------------- 1 | # Analyze satellite images for signs of crop health and size. These metrics can provide insights into future commodity prices. -------------------------------------------------------------------------------- /5. ML Applications In Finance/18. Satellite Image Analysis For Finance/disaster_impact_assessment.py: -------------------------------------------------------------------------------- 1 | # Use satellite images pre and post-natural disasters to assess the impact on infrastructure, agriculture, and local economies, 2 | # which can significantly affect market conditions. -------------------------------------------------------------------------------- /5. ML Applications In Finance/18. Satellite Image Analysis For Finance/natural_resource_exploration.py: -------------------------------------------------------------------------------- 1 | # Analyze satellite images to identify new or depleted natural resources like forests, water bodies, or mineral deposits, 2 | # which can be crucial information for investing in relevant sectors. -------------------------------------------------------------------------------- /5. ML Applications In Finance/18. Satellite Image Analysis For Finance/real_estate_development_monitoring.py: -------------------------------------------------------------------------------- 1 | # Use time-lapsed satellite images to monitor construction and development activity in specific geographical regions. 2 | # This can provide insights into real estate markets and housing prices. -------------------------------------------------------------------------------- /5. ML Applications In Finance/18. Satellite Image Analysis For Finance/retail_traffic_analysis.py: -------------------------------------------------------------------------------- 1 | # Use satellite imagery to count cars in the parking lots of retail stores. This can serve as an alternative data source 2 | # for estimating store popularity, sales, or even economic trends in a given area. -------------------------------------------------------------------------------- /5. ML Applications In Finance/18. Satellite Image Analysis For Finance/shipping_activity.py: -------------------------------------------------------------------------------- 1 | # Monitor shipping lanes and ports to gauge activity levels, which could be indicative of economic health 2 | # or trade flow between countries. -------------------------------------------------------------------------------- /5. ML Applications In Finance/18. Satellite Image Analysis For Finance/tech_stack.txt: -------------------------------------------------------------------------------- 1 | 1. Satellite Image Providers: Platforms like Sentinel Hub, NASA EarthData, or even Google Earth Engine can provide the raw satellite images 2 | you'd need. 3 | 2. Image Processing Libraries: OpenCV for basic image processing tasks, or specialized libraries like Rasterio for geospatial data. 4 | 3. Machine Learning Libraries: TensorFlow or PyTorch for any predictive models you might want to develop. 5 | 4. Data Visualization: Libraries like Matplotlib for static charts or Dash/Plotly for interactive dashboards. -------------------------------------------------------------------------------- /5. ML Applications In Finance/19. Venture Capital/cap_table_simulation.py: -------------------------------------------------------------------------------- 1 | # Create a tool that allows for the simulation of various funding rounds and exits, showing how ownership and dilution 2 | # evolve over time. -------------------------------------------------------------------------------- /5. ML Applications In Finance/19. Venture Capital/investment_thesis_generator.py: -------------------------------------------------------------------------------- 1 | # Use NLP to analyze a large corpus of successful investment theses and presentations to generate a template or 2 | # even a first draft for a new investment thesis. -------------------------------------------------------------------------------- /5. ML Applications In Finance/19. Venture Capital/portfolio_monitoring.py: -------------------------------------------------------------------------------- 1 | # Build an application that tracks key performance indicators (KPIs) of a venture capital portfolio, 2 | # such as customer growth rate, churn, and lifetime value. You can use Python libraries like Dash or Streamlit 3 | # for interactive dashboards. -------------------------------------------------------------------------------- /5. ML Applications In Finance/19. Venture Capital/startup_scouting_dashboard.py: -------------------------------------------------------------------------------- 1 | # Create a dashboard that aggregates information from various sources like Crunchbase, Twitter, and academic journals 2 | # to identify promising startups for investment. You could use web scraping and NLP techniques to get this data. -------------------------------------------------------------------------------- /5. ML Applications In Finance/20. Private Equity/buyout_model_automation.py: -------------------------------------------------------------------------------- 1 | # Implement a script that automates the generation of a Leveraged Buyout (LBO) model based on user input or scraped data. 2 | # This can provide quick valuation estimates for potential acquisitions. -------------------------------------------------------------------------------- /5. ML Applications In Finance/20. Private Equity/deal_sourcing.py: -------------------------------------------------------------------------------- 1 | # Use machine learning algorithms to predict which companies are most likely to be open to a buyout or investment, 2 | # based on features like financial metrics, management changes, or market conditions. -------------------------------------------------------------------------------- /5. ML Applications In Finance/20. Private Equity/due_dilligence_automation.py: -------------------------------------------------------------------------------- 1 | # Streamline the due diligence process by scraping data and reports related to a target company or industry. 2 | # This could include financial data, regulatory filings, news mentions, and more. -------------------------------------------------------------------------------- /5. ML Applications In Finance/20. Private Equity/esg_integration.py: -------------------------------------------------------------------------------- 1 | # Create a tool that assesses potential and current investments for their Environmental, Social, and Governance (ESG) impact, 2 | # potentially using Natural Language Processing to scan through company reports and news articles for relevant information. -------------------------------------------------------------------------------- /5. ML Applications In Finance/20. Private Equity/post_acquisition_value_creation.py: -------------------------------------------------------------------------------- 1 | # Build models that can forecast the impact of various operational improvements on a company's financials, 2 | # helping to inform post-acquisition strategies. -------------------------------------------------------------------------------- /5. ML Applications In Finance/21. VC & PE General Tools/investor_matching.py: -------------------------------------------------------------------------------- 1 | # An algorithm that matches startups with potential investors based on investor preference, startup sector, stage, and other factors. 2 | 3 | -------------------------------------------------------------------------------- /5. ML Applications In Finance/21. VC & PE General Tools/sector_trend_analysis.py: -------------------------------------------------------------------------------- 1 | # Create an algorithm that uses machine learning to identify emerging sectors or trends based on news articles, 2 | # patent filings, academic papers, or market data. -------------------------------------------------------------------------------- /5. ML Applications In Finance/21. VC & PE General Tools/sentiment_analysis_for_foundersandexecutives.py: -------------------------------------------------------------------------------- 1 | # Use Natural Language Processing to analyze interviews, podcasts, or social media interactions involving company founders 2 | # or executives to gauge leadership quality and public perception. -------------------------------------------------------------------------------- /5. ML Applications In Finance/21. VC & PE General Tools/valuation_multiples_benchmarking.py: -------------------------------------------------------------------------------- 1 | # A tool that collects and analyzes valuation multiples for companies in similar industries or stages 2 | # to provide a comparative benchmark. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/22. Investment Banking/.DS_Store -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/1. mergers_and_acquisitions_M&A/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/22. Investment Banking/1. mergers_and_acquisitions_M&A/.DS_Store -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/1. mergers_and_acquisitions_M&A/deal_comparator.py: -------------------------------------------------------------------------------- 1 | # Build a tool to compare past M&A deals based on various metrics like deal size, industry, and financial ratios, 2 | # to gauge the attractiveness of a potential new deal. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/1. mergers_and_acquisitions_M&A/manda_target_screening.py: -------------------------------------------------------------------------------- 1 | # Develop a script that scans financial databases to identify companies that meet specific M&A criteria, 2 | # such as EBITDA margins, revenue growth, or market cap. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/1. mergers_and_acquisitions_M&A/synergy_estimator.py: -------------------------------------------------------------------------------- 1 | # Create a tool that uses historical data to estimate the potential synergies between two merging companies, 2 | # including cost-saving and revenue synergies. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/2. valuation_and_financial_modeling/automated_dcf_model.py: -------------------------------------------------------------------------------- 1 | # Create a tool that can automatically populate a discounted cash flow (DCF) model based on financial statement data, 2 | # offering a quick way to get a valuation estimate. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/2. valuation_and_financial_modeling/capital_structure_optimizer.py: -------------------------------------------------------------------------------- 1 | # Build an algorithm to find the optimal capital structure for a company to minimize its cost of capital. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/2. valuation_and_financial_modeling/comparable_company_analysis.py: -------------------------------------------------------------------------------- 1 | # Implement a script that scrapes market data to perform a comparable company analysis, which is often used for valuation 2 | # in investment banking. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/2. valuation_and_financial_modeling/wacc_calculator.py: -------------------------------------------------------------------------------- 1 | # A tool to automatically calculate the Weighted Average Cost of Capital (WACC), a key metric in many valuation models. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/3. risk_management/credit_risk_assessment.py: -------------------------------------------------------------------------------- 1 | # Develop a machine learning model that evaluates the credit risk associated with corporate loans or bonds. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/3. risk_management/foreign_exchange_risk_management.py: -------------------------------------------------------------------------------- 1 | # Implement a tool to simulate various hedging strategies for managing foreign exchange risk in cross-border deals. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/4. ipo_process/ipo_valuation_model.py: -------------------------------------------------------------------------------- 1 | # Create a model to estimate the potential valuation of a company considering an initial public offering (IPO). -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/4. ipo_process/roadshow_presentation_generator.py: -------------------------------------------------------------------------------- 1 | # Use Natural Language Processing (NLP) to assist in generating the textual content for IPO roadshow presentations 2 | # based on historical data and key business metrics. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/5. client_and_market_analysis/client_relationship_management_CRM.py: -------------------------------------------------------------------------------- 1 | # Create a lightweight CRM tool tailored for investment banking needs, focusing on tracking interactions, deals, and financial metrics. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/5. client_and_market_analysis/market_sentiment_analysis.py: -------------------------------------------------------------------------------- 1 | # Utilize NLP to gauge market sentiment by analyzing news articles, financial reports, and social media posts 2 | # related to specific industries or deals. -------------------------------------------------------------------------------- /5. ML Applications In Finance/22. Investment Banking/5. client_and_market_analysis/pitchbook_automation.py: -------------------------------------------------------------------------------- 1 | # Develop a tool to semi-automate the creation of pitchbooks, drawing from a database of slides and charts and 2 | # auto-populating them based on the deal at hand. -------------------------------------------------------------------------------- /5. ML Applications In Finance/23. trading/algorithmic_trading_bot.py: -------------------------------------------------------------------------------- 1 | # Create a bot that uses technical or statistical signals to make trades. -------------------------------------------------------------------------------- /5. ML Applications In Finance/23. trading/market_maker_simulator.py: -------------------------------------------------------------------------------- 1 | # Build a simulation for a market-making algorithm that quotes buy and sell prices for securities. -------------------------------------------------------------------------------- /5. ML Applications In Finance/23. trading/orderbook_visualizer.py: -------------------------------------------------------------------------------- 1 | # Implement a real-time visualization of an exchange's order book, which can help traders make better decisions. -------------------------------------------------------------------------------- /5. ML Applications In Finance/24. Portfolio Management/portfolio_optimizer.py: -------------------------------------------------------------------------------- 1 | # Use optimization algorithms to calculate the best allocation of assets in a portfolio given various constraints. -------------------------------------------------------------------------------- /5. ML Applications In Finance/24. Portfolio Management/risk_parity_portfolio.py: -------------------------------------------------------------------------------- 1 | # Create a tool that builds a portfolio based on the risk contributions of each asset. -------------------------------------------------------------------------------- /5. ML Applications In Finance/24. Portfolio Management/tax_efficient_portfolio_rebalancer.py: -------------------------------------------------------------------------------- 1 | # Write a script that suggests how to rebalance a portfolio in a tax-efficient manner. -------------------------------------------------------------------------------- /5. ML Applications In Finance/25. Asset Management/alpha_beta_analysis.py: -------------------------------------------------------------------------------- 1 | # Develop a tool that calculates alpha and beta for a given portfolio and benchmarks it against a market index. -------------------------------------------------------------------------------- /5. ML Applications In Finance/25. Asset Management/etf_tracker.py: -------------------------------------------------------------------------------- 1 | # Build a tool that tracks the components of various ETFs and their respective weightings. -------------------------------------------------------------------------------- /5. ML Applications In Finance/25. Asset Management/performance_attribution_tool.py: -------------------------------------------------------------------------------- 1 | # Create a tool that decomposes the returns of a portfolio into various factors like market, sector, and stock selection. -------------------------------------------------------------------------------- /5. ML Applications In Finance/26. Wealth Management/estate_planning_tool.py: -------------------------------------------------------------------------------- 1 | # Create a tool that helps individuals plan the efficient transfer of their estate, taking into account tax considerations 2 | # and inheritance laws. -------------------------------------------------------------------------------- /5. ML Applications In Finance/26. Wealth Management/retirement_planner.py: -------------------------------------------------------------------------------- 1 | # Implement a retirement calculator that projects the future value of retirement funds based 2 | # on various investment options and scenarios. -------------------------------------------------------------------------------- /5. ML Applications In Finance/26. Wealth Management/robo_adviser_prototype.py: -------------------------------------------------------------------------------- 1 | # Build a simplified robo-advisor that suggests an asset allocation based on user's risk tolerance and investment goals. -------------------------------------------------------------------------------- /5. ML Applications In Finance/27. Multi Asset Risk Model/multi_asset_risk_model.py: -------------------------------------------------------------------------------- 1 | # Develop a risk model that calculates the Value-at-Risk (VaR) and 2 | # Conditional Value-at-Risk (CVaR) across multiple asset classes. -------------------------------------------------------------------------------- /5. ML Applications In Finance/28. Personal Financial Management App/personal_financial_management.py: -------------------------------------------------------------------------------- 1 | # Implement a full-fledged app that helps individuals manage their investments, budgets, and financial goals. -------------------------------------------------------------------------------- /Brewfile: -------------------------------------------------------------------------------- 1 | tap "heroku/brew" 2 | tap "homebrew/bundle" 3 | tap "homebrew/core" 4 | brew "autoconf" 5 | brew "automake" 6 | brew "carthage" 7 | brew "chruby" 8 | brew "cmatrix" 9 | brew "openssl@1.1" 10 | brew "ruby" 11 | brew "cocoapods" 12 | brew "coreutils" 13 | brew "doctl" 14 | brew "git" 15 | brew "libksba" 16 | brew "libtool" 17 | brew "minetest" 18 | brew "pkg-config" 19 | brew "ruby@3.0" 20 | brew "zlib" 21 | vscode "Dart-Code.dart-code" 22 | vscode "Dart-Code.flutter" 23 | vscode "georgewfraser.vscode-javac" 24 | vscode "janisdd.vscode-edit-csv" 25 | vscode "ms-python.python" 26 | vscode "ms-python.vscode-pylance" 27 | vscode "ms-toolsai.jupyter" 28 | vscode "ms-toolsai.jupyter-keymap" 29 | vscode "ms-toolsai.jupyter-renderers" 30 | vscode "ms-toolsai.vscode-jupyter-cell-tags" 31 | vscode "ms-toolsai.vscode-jupyter-slideshow" 32 | vscode "ms-vscode.cmake-tools" 33 | vscode "ms-vscode.cpptools" 34 | vscode "ms-vscode.cpptools-extension-pack" 35 | vscode "ms-vscode.cpptools-themes" 36 | vscode "msrvida.vscode-sanddance" 37 | vscode "Nash.awesome-flutter-snippets" 38 | vscode "PKief.material-icon-theme" 39 | vscode "qwtel.sqlite-viewer" 40 | vscode "redhat.java" 41 | vscode "tomoki1207.pdf" 42 | vscode "twxs.cmake" 43 | vscode "VisualStudioExptTeam.intellicode-api-usage-examples" 44 | vscode "VisualStudioExptTeam.vscodeintellicode" 45 | vscode "vscjava.vscode-java-debug" 46 | vscode "vscjava.vscode-java-dependency" 47 | vscode "vscjava.vscode-java-pack" 48 | vscode "vscjava.vscode-java-test" 49 | vscode "vscjava.vscode-maven" 50 | vscode "yy0931.vscode-sqlite3-editor" 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ⚙️🧬🔐 Machine Learning Models in Finance 💹 🚀🛰️ 2 | 3 | This repository contains various machine learning and deep learning models applicable to the financial domain. 4 | 5 | ## Table of Contents 📖 🔬 6 | 7 | - [1. Models Included](#1-models-included-) 8 | - [2. Dependencies](#2-dependencies-) 9 | - [3. Installation](#3-installation-) 10 | - [4. Data Fetching](#4-data-fetching-) 11 | - [5. Data Preprocessing](#5-data-preprocessing-) 12 | - [6. Usage](#6-usage-) 13 | - [7. Models Explained](#7-models-explained-) 14 | - [8. Beyond The Models: Real-World Applications in Finance](#8-beyond-the-models-real-world-applications-in-finance-) 15 | - [9. Disclaimer](#9-disclaimer-) 16 | 17 | ## 1. Models Included 🎹 🔮 18 | 19 | The repository consists of the following categories: 20 | 21 | 1. **Supervised Learning Models** 🤝 🗽 22 | - Linear Regression 23 | - Logistic Regression 24 | - Naive Bayes 25 | - Random Forest 26 | 27 | 2. **Unsupervised Learning Models** 👾 🦽 28 | - Clustering (K-means) 29 | - Dimensionality Reduction (PCA) 30 | 31 | 3. **Deep Learning Models** 📡 ⚓️ 32 | - Supervised Deep Learning Models 33 | - Recurrent Neural Networks (LSTM) 34 | - Convolutional Neural Networks (CNN) 35 | - Unsupervised Deep Learning Models 36 | - Autoencoders 37 | - Generative Adversarial Networks (GANs) 38 | 39 | 4. **Reinforcement Learning Models** 🦾 🚥 40 | - Q-Learning 41 | 42 | ## 2. Dependencies 🥗 🔮 43 | 44 | - Python 3.x 45 | - yfinance 46 | - NumPy 47 | - TensorFlow 48 | - Scikit-learn 49 | 50 | ## 3. Installation 🧶 🔧 51 | 52 | To install all dependencies, run (make a conda or python virtual environment if needed, optionally): 53 | 54 | ```bash 55 | pip install -r requirements.txt 56 | ``` 57 | 58 | To install just the essentials needed, run: 59 | 60 | ```bash 61 | pip install yfinance numpy tensorflow scikit-learn 62 | ``` 63 | 64 | ## 4. Data Fetching 🥽 65 | Data is fetched using the yfinance library for real-world financial data. 66 | 67 | ```python 68 | import yfinance as yf 69 | 70 | def fetch_data(ticker, start_date, end_date): 71 | return yf.download(ticker, start=start_date, end=end_date)['Close'].values 72 | ``` 73 | 74 | ## 5. Data Preprocessing 🎼 75 | 76 | Data is preprocessed to create training and testing datasets, which are then fed into machine learning models. 77 | 78 | ```python 79 | import numpy as np 80 | 81 | def create_dataset(data, look_back=1): 82 | X, Y = [], [] 83 | for i in range(len(data) - look_back - 1): 84 | a = data[i:(i + look_back)] 85 | X.append(a) 86 | Y.append(data[i + look_back]) 87 | return np.array(X), np.array(Y) 88 | ``` 89 | 90 | ## 6. Usage 🛬 🛫 91 | 92 | Navigate to the respective folder and run the Python script for the model you're interested in. 93 | 94 | ```bash 95 | python script_name.py 96 | ``` 97 | 98 | ## 7. Models Explained 🗺️ 99 | 100 | ### 1. Supervised Learning Models 🏗️ 101 | 102 | #### 1.1 Linear Regression 🎢 103 | Linear Regression tries to fit a linear equation to the data, providing a straightforward and effective method for simple predictive tasks. 104 | ![Linear Regression](./1.%20Supervised%20Learning%20Models/linear_regression_summary_with_explanation.png) 105 | 106 | #### 1.2 Logistic Regression 🛟 107 | Logistic Regression is traditionally used for classification problems but has been adapted here for regression tasks. 108 | ![Logistic Regression](./1.%20Supervised%20Learning%20Models/logistic_regression_summary_with_explanation.png) 109 | 110 | #### 1.3 Naive Bayes ⛱️ 111 | Naive Bayes is particularly useful when you have a small dataset and is based on Bayes' theorem. 112 | ![Naive Bayes](./1.%20Supervised%20Learning%20Models/naive_bayes_summary_with_explanation.png) 113 | 114 | #### 1.4 Random Forest 🛤️ 115 | Random Forest combines multiple decision trees to make a more robust and accurate prediction model. 116 | ![Random Forest](./1.%20Supervised%20Learning%20Models/random_forest_summary_with_explanation.png) 117 | 118 | ### 2. Unsupervised Learning Models 🛸 119 | 120 | #### 2.1 Clustering (K-means) 🏟️ 121 | K-means clustering is used to partition data into groups based on feature similarity. 122 | ![K-means](./2.%20Unsupervised%20Learning%20Models/kmeans_financial_data_with_explanation.png) 123 | 124 | #### 2.2 Dimensionality Reduction (PCA) 🚧 125 | PCA is used to reduce the number of features in a dataset while retaining the most relevant information. 126 | ![PCA](./2.%20Unsupervised%20Learning%20Models/PCA_financial_data_with_full_explanation.png) 127 | 128 | ### 3. Deep Learning Models 🛰️ 129 | 130 | #### 3.1 Supervised Deep Learning Models 🚉 131 | 132 | ##### 3.1.1 Recurrent Neural Networks (RNNs/LSTM) 🌌 133 | Recurrent Neural Networks, particularly using Long Short-Term Memory (LSTM) units, are highly effective for sequence prediction problems. In finance, they can be used for time-series forecasting like stock price predictions. 134 | 135 | ![RNNs/LSTM](./3.%20Deep%20Learning%20Models/Apple_Stock_Price_Prediction.png) 136 | 137 | ##### 3.1.2 Convolutional Neural Networks (CNNs) 📱 138 | Convolutional Neural Networks are primarily used in image recognition but can also be applied in finance for pattern recognition in price charts or for processing alternative data types like satellite images for agriculture commodity predictions. 139 | 140 | ![CNNs](./3.%20Deep%20Learning%20Models/Financial_News_Sentiment_Analysis.png) 141 | 142 | #### 3.2 Unsupervised Deep Learning Models 🎛️ 143 | 144 | ##### 3.2.1 Autoencoders 📻 145 | Autoencoders are used for anomaly detection in financial data, identifying unusual patterns that do not conform to expected behavior. 146 | 147 | ![Autoencoders](./3.%20Deep%20Learning%20Models/Anomaly_Detection_Using_Autoencoder.png) 148 | 149 | ##### 3.2.2 Generative Adversarial Networks (GANs) ⏲️ 150 | GANs are used for simulating different market conditions, helping in risk assessment for various investment strategies. 151 | 152 | ![GANs](./3.%20Deep%20Learning%20Models/GAN_Financial_Simulation.png) 153 | 154 | ### 4. Reinforcement Learning Models 🔋 155 | 156 | #### 4.1 Q-Learning 🔌 157 | Q-Learning is a type of model-free reinforcement learning algorithm used here for stock trading. 158 | ![Q-Learning](./4.%20Reinforcement%20Learning%20Models/Q_Learning_Stock_Trading_YFinance.png) 159 | 160 | ## 8. Beyond The Models: Real-World Applications in Finance 💸 161 | 162 | In addition to the core machine learning models that form the backbone of this repository, we'll explore practical applications that span various dimensions of the financial sector. Below is a snapshot of the project's tree structure that gives you an idea of what these applications are: 163 | 164 | ``` 165 | 5. ml_applications_in_finance 166 | │ ├── risk_management 167 | │ ├── decentralized_finance_(DEFI) 168 | │ ├── environmental_social_and_governance_investing_(ESG) 169 | │ ├── behavioural_economics 170 | │ ├── blockchain_and_cryptocurrency 171 | │ ├── explainable_AI_for_finance 172 | │ ├── robotic_process_automation_(RPA) 173 | │ ├── textual_and_alternative_data_for_finance 174 | │ ├── fundamental_analysis 175 | │ ├── satellite_image_analysis_for_finance 176 | │ ├── venture_capital 177 | │ ├── asset_management 178 | │ ├── private_equity 179 | │ ├── investment_banking 180 | │ ├── trading 181 | │ ├── portfolio_management 182 | │ ├── wealth_management 183 | │ ├── multi_asset_risk_model 184 | │ ├── personal_financial_management_app 185 | │ ├── market_analysis_and_prediction 186 | │ ├── customer_service 187 | │ ├── compliance_and_regulatory 188 | │ ├── real_estate 189 | │ ├── supply_chain_finance 190 | │ ├── invoice_management 191 | │ └── cash_management 192 | ``` 193 | 194 | From risk management to blockchain and cryptocurrency, from venture capital to investment banking, and from asset management to personal financial management, we aim to cover a wide array of use-cases. Each of these applications is backed by one or more of the machine learning models described earlier in the repository. 195 | 196 | **Note**: The list of applications is not exhaustive, and the project is a work in progress. While I aim to continually update it with new techniques and applications, there might be instances where certain modules may be added or removed based on their relevance and effectiveness. 197 | 198 | 199 | ## Disclaimer 💳 200 | 201 | The code provided in this repository is for educational and informational purposes only. It is not intended for live trading or as financial advice. Please exercise caution and conduct your own research before making any investment decisions. 202 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import numpy as np 3 | import tensorflow as tf 4 | from sklearn.linear_model import LinearRegression, LogisticRegression 5 | from sklearn.naive_bayes import GaussianNB 6 | from sklearn.ensemble import RandomForestRegressor 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import mean_squared_error 9 | 10 | def fetch_data(ticker, start_date, end_date): 11 | return yf.download(ticker, start=start_date, end=end_date)['Close'].values 12 | 13 | def create_dataset(data, look_back=1): 14 | X, Y = [], [] 15 | for i in range(len(data) - look_back - 1): 16 | a = data[i:(i + look_back)] 17 | X.append(a) 18 | Y.append(data[i + look_back]) 19 | return np.array(X), np.array(Y) 20 | 21 | def linear_regression_model(X_train, Y_train, X_test, Y_test): 22 | model = LinearRegression() 23 | model.fit(X_train, Y_train) 24 | pred = model.predict(X_test) 25 | print("Linear Regression MSE:", mean_squared_error(Y_test, pred)) 26 | 27 | def logistic_regression_model(X_train, Y_train, X_test, Y_test): 28 | model = LogisticRegression() 29 | model.fit(X_train, Y_train.astype('int')) 30 | pred = model.predict(X_test) 31 | print("Logistic Regression MSE:", mean_squared_error(Y_test, pred)) 32 | 33 | def naive_bayes_model(X_train, Y_train, X_test, Y_test): 34 | model = GaussianNB() 35 | model.fit(X_train, Y_train.astype('int')) 36 | pred = model.predict(X_test) 37 | print("Naive Bayes MSE:", mean_squared_error(Y_test, pred)) 38 | 39 | def random_forest_model(X_train, Y_train, X_test, Y_test): 40 | model = RandomForestRegressor() 41 | model.fit(X_train, Y_train) 42 | pred = model.predict(X_test) 43 | print("Random Forest MSE:", mean_squared_error(Y_test, pred)) 44 | 45 | def lstm_model(X_train, Y_train, X_test, Y_test, look_back): 46 | model = tf.keras.models.Sequential([ 47 | tf.keras.layers.LSTM(50, input_shape=(look_back, 1)), 48 | tf.keras.layers.Dense(1) 49 | ]) 50 | model.compile(optimizer='adam', loss='mean_squared_error') 51 | model.fit(X_train, Y_train, epochs=2, batch_size=1) 52 | pred = model.predict(X_test) 53 | print("LSTM MSE:", mean_squared_error(Y_test, pred)) 54 | 55 | def rnn_model(X_train, Y_train, X_test, Y_test, look_back): 56 | model = tf.keras.models.Sequential([ 57 | tf.keras.layers.SimpleRNN(50, input_shape=(look_back, 1)), 58 | tf.keras.layers.Dense(1) 59 | ]) 60 | model.compile(optimizer='adam', loss='mean_squared_error') 61 | model.fit(X_train, Y_train, epochs=2, batch_size=1) 62 | pred = model.predict(X_test) 63 | print("RNN MSE:", mean_squared_error(Y_test, pred)) 64 | 65 | class QLearningAgent: 66 | def __init__(self, states, actions, alpha=0.1, gamma=0.99, epsilon=0.1): 67 | self.states = states 68 | self.actions = actions 69 | self.alpha = alpha 70 | self.gamma = gamma 71 | self.epsilon = epsilon 72 | self.q_table = np.zeros((self.states, len(self.actions))) 73 | 74 | def choose_action(self, state): 75 | if np.random.uniform(0, 1) < self.epsilon: 76 | return np.random.choice(self.actions) 77 | else: 78 | return np.argmax(self.q_table[state, :]) 79 | 80 | def learn(self, state, action, reward, next_state): 81 | predict = self.q_table[state, action] 82 | target = reward + self.gamma * np.max(self.q_table[next_state, :]) 83 | self.q_table[state, action] += self.alpha * (target - predict) 84 | 85 | def reinforcement_learning_q_learning(data, look_back=1): 86 | n_actions = 3 # Buy, Sell, Hold 87 | agent = QLearningAgent(len(data) - look_back, range(n_actions)) 88 | state = 0 89 | for i in range(0, len(data) - look_back - 1): 90 | state = i 91 | action = agent.choose_action(state) 92 | next_state = state + 1 93 | # Here you can define your own reward function based on the action and price change 94 | reward = data[next_state] - data[state] if action == 0 else 0 # Simplified reward function 95 | agent.learn(state, action, reward, next_state) 96 | 97 | # Predict the last action based on Q-values. 98 | # You can extend this part to make multiple predictions. 99 | final_state = len(data) - look_back - 1 100 | final_action = agent.choose_action(final_state) 101 | return final_action 102 | 103 | def main(): 104 | ticker = 'AAPL' 105 | look_back = 1 106 | data = fetch_data(ticker, '2020-01-01', '2021-01-01') 107 | X, Y = create_dataset(data, look_back) 108 | X = np.reshape(X, (X.shape[0], X.shape[1], 1)) 109 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) 110 | X_train_flat = X_train.reshape(X_train.shape[0], look_back) 111 | X_test_flat = X_test.reshape(X_test.shape[0], look_back) 112 | 113 | model_type = input("Enter the model type (linear_regression, logistic_regression, naive_bayes, random_forest, lstm, rnn, reinforcement_learning_q_learning): ") 114 | 115 | if model_type == 'linear_regression': 116 | linear_regression_model(X_train_flat, Y_train, X_test_flat, Y_test) 117 | elif model_type == 'logistic_regression': 118 | logistic_regression_model(X_train_flat, Y_train, X_test_flat, Y_test) 119 | elif model_type == 'naive_bayes': 120 | naive_bayes_model(X_train_flat, Y_train, X_test_flat, Y_test) 121 | elif model_type == 'random_forest': 122 | random_forest_model(X_train_flat, Y_train, X_test_flat, Y_test) 123 | elif model_type == 'lstm': 124 | lstm_model(X_train, Y_train, X_test, Y_test, look_back) 125 | elif model_type == 'rnn': 126 | rnn_model(X_train, Y_train, X_test, Y_test, look_back) 127 | elif model_type == 'reinforcement_learning_q_learning': 128 | final_action = reinforcement_learning_q_learning(data, look_back) 129 | print(f"Final action suggested by Q-Learning: {['Buy', 'Sell', 'Hold'][final_action]}") 130 | 131 | if __name__ == "__main__": 132 | main() 133 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.4.0 2 | altair==5.1.1 3 | anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1693488585952/work 4 | appdirs==1.4.4 5 | appnope @ file:///home/conda/feedstock_root/build_artifacts/appnope_1649077682618/work 6 | argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1692818318753/work 7 | argon2-cffi-bindings @ file:///Users/runner/miniforge3/conda-bld/argon2-cffi-bindings_1666850758378/work 8 | arrow @ file:///home/conda/feedstock_root/build_artifacts/arrow_1662382474514/work 9 | asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1694046349000/work 10 | astunparse==1.6.3 11 | async-lru @ file:///home/conda/feedstock_root/build_artifacts/async-lru_1690563019058/work 12 | attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1683424013410/work 13 | Babel @ file:///home/conda/feedstock_root/build_artifacts/babel_1677767029043/work 14 | backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work 15 | backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1687772187254/work 16 | beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1680888073205/work 17 | bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1674535352125/work 18 | blinker==1.6.2 19 | Brotli @ file:///Users/runner/miniforge3/conda-bld/brotli-split_1693583678882/work 20 | cached-property @ file:///home/conda/feedstock_root/build_artifacts/cached_property_1615209429212/work 21 | cachetools==5.3.1 22 | certifi==2023.7.22 23 | cffi @ file:///Users/runner/miniforge3/conda-bld/cffi_1671179612308/work 24 | charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1688813409104/work 25 | click==8.1.7 26 | comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1691044910542/work 27 | contourpy==1.1.0 28 | cycler==0.11.0 29 | debugpy @ file:///Users/runner/miniforge3/conda-bld/debugpy_1694118104592/work 30 | decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work 31 | defusedxml @ file:///home/conda/feedstock_root/build_artifacts/defusedxml_1615232257335/work 32 | entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work 33 | exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1692026125334/work 34 | executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1667317341051/work 35 | fastjsonschema @ file:///home/conda/feedstock_root/build_artifacts/python-fastjsonschema_1690055433477/work/dist 36 | flatbuffers==23.5.26 37 | fonttools==4.42.1 38 | fqdn @ file:///home/conda/feedstock_root/build_artifacts/fqdn_1638810296540/work/dist 39 | fredapi==0.5.1 40 | frozendict==2.3.8 41 | gast==0.4.0 42 | gitdb==4.0.10 43 | GitPython==3.1.36 44 | google-auth==2.22.0 45 | google-auth-oauthlib==1.0.0 46 | google-pasta==0.2.0 47 | grpcio==1.58.0 48 | h5py==3.9.0 49 | html5lib==1.1 50 | idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1663625384323/work 51 | importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1688754491823/work 52 | importlib-resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1691408075105/work 53 | ipykernel @ file:///Users/runner/miniforge3/conda-bld/ipykernel_1693880377119/work 54 | ipython @ file:///Users/runner/miniforge3/conda-bld/ipython_1693580003080/work 55 | ipython-genutils==0.2.0 56 | ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1690877070294/work 57 | isoduration @ file:///home/conda/feedstock_root/build_artifacts/isoduration_1638811571363/work/dist 58 | jax==0.4.14 59 | jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1690896916983/work 60 | Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1654302431367/work 61 | joblib==1.3.2 62 | json5 @ file:///home/conda/feedstock_root/build_artifacts/json5_1688248289187/work 63 | jsonpointer==2.0 64 | jsonschema @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-meta_1691761378595/work 65 | jsonschema-specifications @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-specifications_1689701150890/work 66 | jupyter @ file:///Users/runner/miniforge3/conda-bld/jupyter_1670249893813/work 67 | jupyter-console @ file:///home/conda/feedstock_root/build_artifacts/jupyter_console_1678118109161/work 68 | jupyter-events @ file:///home/conda/feedstock_root/build_artifacts/jupyter_events_1691505939576/work 69 | jupyter-lsp @ file:///home/conda/feedstock_root/build_artifacts/jupyter-lsp-meta_1685453365113/work/jupyter-lsp 70 | jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1693317508789/work 71 | jupyter_core @ file:///Users/runner/miniforge3/conda-bld/jupyter_core_1686775757864/work 72 | jupyter_server @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_1693487358826/work 73 | jupyter_server_terminals @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_terminals_1673491454549/work 74 | jupyterlab @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_1692015883666/work 75 | jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1649936611996/work 76 | jupyterlab-widgets @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_widgets_1688489450369/work 77 | jupyterlab_server @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_server_1690205927615/work 78 | keras==2.12.0 79 | kiwisolver==1.4.5 80 | libclang==16.0.6 81 | lxml==4.9.3 82 | Markdown==3.4.4 83 | markdown-it-py==3.0.0 84 | MarkupSafe @ file:///Users/runner/miniforge3/conda-bld/markupsafe_1685769179270/work 85 | matplotlib==3.7.2 86 | matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work 87 | mdurl==0.1.2 88 | mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1692116650819/work 89 | ml-dtypes==0.2.0 90 | multitasking==0.0.11 91 | nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1684790896106/work 92 | nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert-meta_1693331710275/work 93 | nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1690814868471/work 94 | nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1664684991461/work 95 | notebook @ file:///home/conda/feedstock_root/build_artifacts/notebook_1693410793506/work 96 | notebook_shim @ file:///home/conda/feedstock_root/build_artifacts/notebook-shim_1682360583588/work 97 | numpy==1.23.5 98 | oauthlib==3.2.2 99 | opt-einsum==3.3.0 100 | overrides @ file:///home/conda/feedstock_root/build_artifacts/overrides_1691338815398/work 101 | packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1681337016113/work 102 | pandas @ file:///Users/runner/miniforge3/conda-bld/pandas_1693415364816/work 103 | pandas-datareader==0.10.0 104 | pandocfilters @ file:///home/conda/feedstock_root/build_artifacts/pandocfilters_1631603243851/work 105 | parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work 106 | patsy==0.5.3 107 | pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1667297516076/work 108 | pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work 109 | Pillow==9.5.0 110 | pkgutil_resolve_name @ file:///home/conda/feedstock_root/build_artifacts/pkgutil-resolve-name_1633981968097/work 111 | platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1690813113769/work 112 | ply==3.11 113 | prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1689032443210/work 114 | prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1688565951714/work 115 | protobuf==4.24.3 116 | psutil @ file:///Users/runner/miniforge3/conda-bld/psutil_1681775196112/work 117 | ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl 118 | pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work 119 | pyarrow==13.0.0 120 | pyasn1==0.5.0 121 | pyasn1-modules==0.3.0 122 | pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work 123 | pydeck==0.8.0 124 | Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1691408637400/work 125 | Pympler==1.0.1 126 | pyobjc-core @ file:///Users/runner/miniforge3/conda-bld/pyobjc-core_1686129336286/work 127 | pyobjc-framework-Cocoa @ file:///Users/runner/miniforge3/conda-bld/pyobjc-framework-cocoa_1686136009200/work 128 | pyparsing==3.0.9 129 | PyQt5==5.15.9 130 | PyQt5-sip==12.12.2 131 | PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1661604839144/work 132 | python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work 133 | python-json-logger @ file:///home/conda/feedstock_root/build_artifacts/python-json-logger_1677079630776/work 134 | pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1693930252784/work 135 | pytz-deprecation-shim==0.1.0.post0 136 | PyYAML @ file:///Users/runner/miniforge3/conda-bld/pyyaml_1692737410683/work 137 | pyzmq @ file:///Users/runner/miniforge3/conda-bld/pyzmq_1691667591386/work 138 | qtconsole @ file:///home/conda/feedstock_root/build_artifacts/qtconsole-base_1693604303222/work 139 | QtPy @ file:///home/conda/feedstock_root/build_artifacts/qtpy_1693347765905/work 140 | referencing @ file:///home/conda/feedstock_root/build_artifacts/referencing_1691337268233/work 141 | requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1684774241324/work 142 | requests-oauthlib==1.3.1 143 | rfc3339-validator @ file:///home/conda/feedstock_root/build_artifacts/rfc3339-validator_1638811747357/work 144 | rfc3986-validator @ file:///home/conda/feedstock_root/build_artifacts/rfc3986-validator_1598024191506/work 145 | rich==13.5.3 146 | rpds-py @ file:///Users/runner/miniforge3/conda-bld/rpds-py_1693850362535/work 147 | rsa==4.9 148 | scikit-learn==1.3.0 149 | scipy==1.11.2 150 | seaborn==0.12.2 151 | Send2Trash @ file:///Users/runner/miniforge3/conda-bld/send2trash_1682601407921/work 152 | sip @ file:///Users/runner/miniforge3/conda-bld/sip_1690986115414/work 153 | six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work 154 | smmap==5.0.1 155 | sniffio @ file:///home/conda/feedstock_root/build_artifacts/sniffio_1662051266223/work 156 | soupsieve @ file:///home/conda/feedstock_root/build_artifacts/soupsieve_1693929250441/work 157 | stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work 158 | statsmodels==0.14.0 159 | streamlit==1.26.0 160 | tenacity==8.2.3 161 | tensorboard==2.12.3 162 | tensorboard-data-server==0.7.1 163 | tensorflow==2.13.0 164 | tensorflow-estimator==2.12.0 165 | tensorflow-io-gcs-filesystem==0.34.0 166 | tensorflow-macos==2.12.0 167 | tensorflow-metal==1.0.0 168 | termcolor==2.3.0 169 | terminado @ file:///Users/runner/miniforge3/conda-bld/terminado_1670254106711/work 170 | threadpoolctl==3.2.0 171 | tinycss2 @ file:///home/conda/feedstock_root/build_artifacts/tinycss2_1666100256010/work 172 | toml @ file:///home/conda/feedstock_root/build_artifacts/toml_1604308577558/work 173 | tomli @ file:///home/conda/feedstock_root/build_artifacts/tomli_1644342247877/work 174 | toolz==0.12.0 175 | tornado @ file:///Users/runner/miniforge3/conda-bld/tornado_1692311824797/work 176 | traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1675110562325/work 177 | typing-utils @ file:///home/conda/feedstock_root/build_artifacts/typing_utils_1622899189314/work 178 | typing_extensions==4.5.0 179 | tzdata @ file:///home/conda/feedstock_root/build_artifacts/python-tzdata_1680081134351/work 180 | tzlocal==4.3.1 181 | uri-template @ file:///home/conda/feedstock_root/build_artifacts/uri-template_1688655812972/work/dist 182 | urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1689789803562/work 183 | validators==0.22.0 184 | wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1673864653149/work 185 | webcolors @ file:///home/conda/feedstock_root/build_artifacts/webcolors_1679900785843/work 186 | webencodings==0.5.1 187 | websocket-client @ file:///home/conda/feedstock_root/build_artifacts/websocket-client_1692730992302/work 188 | Werkzeug==2.3.7 189 | widgetsnbextension @ file:///home/conda/feedstock_root/build_artifacts/widgetsnbextension_1688504439014/work 190 | wrapt==1.14.1 191 | yfinance==0.2.28 192 | zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1689374466814/work 193 | --------------------------------------------------------------------------------