├── .gitignore
├── 0. Data Prep
    └── data_prep.py
├── 1. Supervised Learning Models
    ├── .DS_Store
    ├── 1. linear_regression.py
    ├── 2. logistic_regression_model.py
    ├── 3. naive_bayes_model.py
    ├── 4. random_forest_model.py
    ├── README.md
    ├── linear_regression_summary_with_explanation.png
    ├── logistic_regression_summary_with_explanation.png
    ├── naive_bayes_summary_with_explanation.png
    └── random_forest_summary_with_explanation.png
├── 2. Unsupervised Learning Models
    ├── .DS_Store
    ├── 1. clustering.py
    ├── 2. dimensionality_reduction.py
    ├── PCA_financial_data_with_full_explanation.png
    ├── README.md
    └── kmeans_financial_data_with_explanation.png
├── 3. Deep Learning Models
    ├── .DS_Store
    ├── Anomaly_Detection_Using_Autoencoder.png
    ├── Apple_Stock_Price_Prediction.png
    ├── Financial_News_Sentiment_Analysis.png
    ├── GAN_Financial_Simulation.png
    ├── README.md
    ├── supervised_deep_learning_models
    │   ├── .DS_Store
    │   ├── 1. recurrent_neural_network_RNN_lstm.py
    │   └── 2. convolutional_neural_networks_(CNNs).py
    └── unsupervised_deep_learning_models
    │   ├── .DS_Store
    │   ├── 3. autoencoders.py
    │   └── 4. generative_adversarial_networks_(GANs).py
├── 4. Reinforcement Learning Models
    ├── Q_Learning_Stock_Trading_YFinance.png
    ├── README.md
    └── q_learning.py
├── 5. ML Applications In Finance
    ├── .DS_Store
    ├── 01. Risk Management ✅
    │   ├── .DS_Store
    │   ├── 1. credit_scoring✅.py
    │   ├── 2. value-at-risk_modeling✅.py
    │   ├── 3. fraud_detection✅.py
    │   └── 4. operational_risk_modeling✅.py
    ├── 02. Asset Management ✅
    │   ├── .DS_Store
    │   ├── 1. portfolio_optimization✅.py
    │   ├── 2. algorithmic_trading✅.py
    │   ├── 3. index_tracking✅.py
    │   ├── 4. pairs_trading✅.py
    │   └── requirements.txt
    ├── 03. Market Analysis And Prediction
    │   ├── .DS_Store
    │   ├── 1. price_forecasting.py
    │   ├── 2. sentiment_analysis.py
    │   ├── 3. option_pricing.py
    │   └── 4. order_flow_prediction.py
    ├── 04. Customer Service
    │   ├── .DS_Store
    │   ├── 1. chatbots.py
    │   ├── 2. personal_finance_management.py
    │   └── 3. customer_segmentation.py
    ├── 05. Compliance and Regulatory
    │   ├── .DS_Store
    │   ├── 1. anti_money_laundering_AML.py
    │   ├── 2. regulatory_reporting_automation.py
    │   └── 3. insider_trading_detection.py
    ├── 06. Real Estate
    │   ├── .DS_Store
    │   ├── 1. property_valuation.py
    │   └── 2. investment_analysis.py
    ├── 07. Supply Chain Finance
    │   └── supply_chain_finance.py
    ├── 08. Invoice Management
    │   └── invoice_management.py
    ├── 09. Cash Management
    │   └── cash_management.py
    ├── 10. Decentralized Finance (DEFI)
    │   ├── 1.yield_farming_optimizer.py
    │   └── 2.smart_contract_auditor.py
    ├── 11. Environmental Social And Governance Investing (ESG)
    │   ├── 1.sustainability_analytics.py
    │   └── 2.impact_measurement.py
    ├── 12. Behavioural Economics
    │   ├── 1.nudges.py
    │   └── 2.investor_sentiment_models.py
    ├── 13. Blockchain And Cryptocurrency
    │   └── cryptocurrency_price_prediction.py
    ├── 14. Explainable AI For Finance
    │   └── model_interpretability.py
    ├── 15. Robotic Process Automation (RPA)
    │   └── automated_invoice_processing.py
    ├── 16. Textual And Alternative Data For Finance
    │   ├── news_analytics.py
    │   └── reddit_sentiment_and_market_trends.py
    ├── 17. Fundamental Anaysis
    │   ├── automated_10kand10Q_parser.py
    │   └── financial_ratios_dashboard.py
    ├── 18. Satellite Image Analysis For Finance
    │   ├── agricultural_yield_prediction.py
    │   ├── disaster_impact_assessment.py
    │   ├── natural_resource_exploration.py
    │   ├── real_estate_development_monitoring.py
    │   ├── retail_traffic_analysis.py
    │   ├── shipping_activity.py
    │   └── tech_stack.txt
    ├── 19. Venture Capital
    │   ├── cap_table_simulation.py
    │   ├── investment_thesis_generator.py
    │   ├── portfolio_monitoring.py
    │   └── startup_scouting_dashboard.py
    ├── 20. Private Equity
    │   ├── buyout_model_automation.py
    │   ├── deal_sourcing.py
    │   ├── due_dilligence_automation.py
    │   ├── esg_integration.py
    │   └── post_acquisition_value_creation.py
    ├── 21. VC & PE General Tools
    │   ├── investor_matching.py
    │   ├── sector_trend_analysis.py
    │   ├── sentiment_analysis_for_foundersandexecutives.py
    │   └── valuation_multiples_benchmarking.py
    ├── 22. Investment Banking
    │   ├── .DS_Store
    │   ├── 1. mergers_and_acquisitions_M&A
    │   │   ├── .DS_Store
    │   │   ├── deal_comparator.py
    │   │   ├── manda_target_screening.py
    │   │   └── synergy_estimator.py
    │   ├── 2. valuation_and_financial_modeling
    │   │   ├── automated_dcf_model.py
    │   │   ├── capital_structure_optimizer.py
    │   │   ├── comparable_company_analysis.py
    │   │   └── wacc_calculator.py
    │   ├── 3. risk_management
    │   │   ├── credit_risk_assessment.py
    │   │   └── foreign_exchange_risk_management.py
    │   ├── 4. ipo_process
    │   │   ├── ipo_valuation_model.py
    │   │   └── roadshow_presentation_generator.py
    │   └── 5. client_and_market_analysis
    │   │   ├── client_relationship_management_CRM.py
    │   │   ├── market_sentiment_analysis.py
    │   │   └── pitchbook_automation.py
    ├── 23. trading
    │   ├── algorithmic_trading_bot.py
    │   ├── market_maker_simulator.py
    │   └── orderbook_visualizer.py
    ├── 24. Portfolio Management
    │   ├── portfolio_optimizer.py
    │   ├── risk_parity_portfolio.py
    │   └── tax_efficient_portfolio_rebalancer.py
    ├── 25. Asset Management
    │   ├── alpha_beta_analysis.py
    │   ├── etf_tracker.py
    │   └── performance_attribution_tool.py
    ├── 26. Wealth Management
    │   ├── estate_planning_tool.py
    │   ├── retirement_planner.py
    │   └── robo_adviser_prototype.py
    ├── 27. Multi Asset Risk Model
    │   └── multi_asset_risk_model.py
    └── 28. Personal Financial Management App
    │   └── personal_financial_management.py
├── Brewfile
├── README.md
├── main.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | # macOS system files
 2 | .DS_Store
 3 | 
 4 | # Python generated files and directories
 5 | __pycache__/
 6 | *.pyc
 7 | *.pyo
 8 | *.pyd
 9 | .Python
10 | build/
11 | dist/
12 | *.egg-info/
13 | .eggs/
14 | sdist/
15 | develop-eggs/
16 | .idea/
17 | .vscode/
18 | *.swp
19 | 


--------------------------------------------------------------------------------
/0. Data Prep/data_prep.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import yfinance as yf
 3 | 
 4 | def fetch_data(ticker, start_date, end_date):
 5 |     return yf.download(ticker, start=start_date, end=end_date)['Close'].values
 6 | 
 7 | def create_dataset(data, look_back=1):
 8 |     X, Y = [], []
 9 |     for i in range(len(data) - look_back - 1):
10 |         a = data[i:(i + look_back)]
11 |         X.append(a)
12 |         Y.append(data[i + look_back])
13 |     return np.array(X), np.array(Y)
14 | 


--------------------------------------------------------------------------------
/1. Supervised Learning Models/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/1. Supervised Learning Models/.DS_Store


--------------------------------------------------------------------------------
/1. Supervised Learning Models/1. linear_regression.py:
--------------------------------------------------------------------------------
 1 | import yfinance as yf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from sklearn.linear_model import LinearRegression
 5 | from sklearn.model_selection import train_test_split
 6 | from sklearn.metrics import mean_squared_error, r2_score
 7 | 
 8 | def fetch_data(ticker, start_date, end_date):
 9 |     return yf.download(ticker, start=start_date, end=end_date)['Close'].values
10 | 
11 | def create_dataset(data, look_back=1):
12 |     X, Y = [], []
13 |     for i in range(len(data) - look_back - 1):
14 |         X.append(data[i:(i + look_back)])
15 |         Y.append(data[i + look_back])
16 |     return np.array(X), np.array(Y)
17 | 
18 | if __name__ == "__main__":
19 |     # Fetch and prepare data
20 |     data = fetch_data('AAPL', '2010-01-01', '2023-01-01')
21 |     X, Y = create_dataset(data)
22 |     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
23 |     
24 |     # Train model
25 |     model = LinearRegression()
26 |     model.fit(X_train, Y_train)
27 |     pred = model.predict(X_test)
28 |     
29 |     # Evaluate model
30 |     mse = mean_squared_error(Y_test, pred)
31 |     r2 = r2_score(Y_test, pred)
32 |     
33 |     # Create a figure and a grid of subplots
34 |     fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(16, 12))
35 |     
36 |     # Plotting the actual vs predicted values on first subplot
37 |     ax[0].scatter(range(len(Y_test)), Y_test, c='g', label='Actual')
38 |     ax[0].scatter(range(len(pred)), pred, c='r', label='Predicted')
39 |     ax[0].set_xlabel('Index in Test Set')
40 |     ax[0].set_ylabel('Stock Price (USD)')
41 |     ax[0].legend()
42 |     ax[0].set_title('Linear Regression Model: Actual vs Predicted Stock Prices')
43 |     
44 |     # Annotations and equations on the second subplot
45 |     ax[1].axis('off')
46 |     ax[1].text(0.1, 0.8, f'Model: Linear Regression', fontsize=12)
47 |     ax[1].text(0.1, 0.7, f'Equation: Y = {model.coef_[0]:.2f} * X + {model.intercept_:.2f}', fontsize=12)
48 |     ax[1].text(0.1, 0.6, f'Mean Squared Error: {mse:.2f}', fontsize=12)
49 |     ax[1].text(0.1, 0.5, f'R^2 Score: {r2:.2f}', fontsize=12)
50 |     
51 |     # Explanation
52 |     explanation = (
53 |         "Explanation:\n"
54 |         "Linear Regression tries to fit a linear equation to the data points.\n"
55 |         "In this case, we are trying to predict the future stock price of Apple Inc.\n"
56 |         "The model takes the stock price of a previous day (X) and predicts the stock price\n"
57 |         "for the next day (Y) using the equation Y = Coefficient * X + Intercept.\n"
58 |         "MSE and R^2 Score are metrics to evaluate the model's performance."
59 |     )
60 |     ax[1].text(0.1, 0.1, explanation, fontsize=12)
61 | 
62 |     # Save plot as a PNG file
63 |     plt.savefig('linear_regression_summary_with_explanation.png')
64 | 
65 |     # Show the plot
66 |     plt.show()
67 | 


--------------------------------------------------------------------------------
/1. Supervised Learning Models/2. logistic_regression_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yfinance as yf
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import seaborn as sns
 6 | from sklearn.linear_model import LogisticRegression
 7 | from sklearn.model_selection import train_test_split
 8 | from sklearn.metrics import accuracy_score, confusion_matrix
 9 | from fredapi import Fred
10 | 
11 | def fetch_data(ticker, start_date, end_date):
12 |     return yf.download(ticker, start=start_date, end=end_date)['Close'].values
13 | 
14 | def fetch_fred_data(api_key, series_id, start_date, end_date):
15 |     fred = Fred(api_key=api_key)
16 |     return fred.get_series(series_id, start_date, end_date).values
17 | 
18 | def create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment, look_back=1):
19 |     X, Y = [], []
20 |     for i in range(len(stock_data) - look_back - 1):
21 |         features = list(stock_data[i:(i + look_back)]) + [sp500_data[i], interest_rates[i], gdp_growth[i], unemployment[i]]
22 |         X.append(features)
23 |         Y.append(1 if stock_data[i + look_back] > stock_data[i + look_back - 1] else 0)
24 |     return np.array(X), np.array(Y)
25 | 
26 | if __name__ == "__main__":
27 |     api_key = os.getenv('FRED_API_KEY')
28 |     if api_key is None:
29 |         print("Please set your FRED_API_KEY as an environment variable.")
30 |         exit()
31 | 
32 |     # Fetch and prepare data
33 |     stock_data = fetch_data('AAPL', '2010-01-01', '2023-01-01')
34 |     sp500_data = fetch_data('^GSPC', '2010-01-01', '2023-01-01')
35 |     interest_rates = fetch_fred_data(api_key, 'TB3MS', '2010-01-01', '2023-01-01')
36 |     gdp_growth = fetch_fred_data(api_key, 'A191RL1Q225SBEA', '2010-01-01', '2023-01-01')
37 |     unemployment = fetch_fred_data(api_key, 'UNRATE', '2010-01-01', '2023-01-01')
38 | 
39 |     # Truncate data to the smallest length among all series
40 |     min_len = min(len(stock_data), len(sp500_data), len(interest_rates), len(gdp_growth), len(unemployment))
41 |     stock_data, sp500_data, interest_rates, gdp_growth, unemployment = [arr[:min_len] for arr in [stock_data, sp500_data, interest_rates, gdp_growth, unemployment]]
42 | 
43 |     X, Y = create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment)
44 |     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
45 | 
46 |     # Train model
47 |     model = LogisticRegression()
48 |     model.fit(X_train, Y_train)
49 |     pred = model.predict(X_test)
50 | 
51 |     # Evaluate model
52 |     accuracy = accuracy_score(Y_test, pred)
53 |     cm = confusion_matrix(Y_test, pred)
54 | 
55 |     # Create figure
56 |     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
57 | 
58 |     # Plot confusion matrix using Seaborn
59 |     sns.heatmap(cm, annot=True, fmt='g', ax=ax1, cmap='Blues')
60 |     ax1.set_xlabel('Predicted labels')
61 |     ax1.set_ylabel('True labels')
62 |     ax1.set_title('Confusion Matrix')
63 | 
64 |     # Explanation
65 |     explanation = (
66 |         f"Model: Logistic Regression\n"
67 |         f"Accuracy: {accuracy:.2f}\n\n"
68 |         "Predictors:\n"
69 |         "- Previous day's stock price\n"
70 |         "- S&P 500 index\n"
71 |         "- Interest rates\n"
72 |         "- GDP growth rates\n"
73 |         "- Unemployment rates\n\n"
74 |         "Explanation:\n"
75 |         "Logistic Regression is a classification algorithm.\n"
76 |         "It uses multiple predictors to estimate the probability of the stock price going up (1) or down (0) the next day.\n"
77 |         "Accuracy is the metric used to evaluate the model's performance.\n"
78 |         "The confusion matrix provides a summary of the number of correct and incorrect predictions."
79 |     )
80 | 
81 |     ax2.axis('off')
82 |     ax2.text(0.1, 0.1, explanation, fontsize=12)
83 | 
84 |     plt.tight_layout()
85 |     plt.savefig('logistic_regression_summary_with_explanation.png')
86 |     plt.show()
87 | 
88 | 


--------------------------------------------------------------------------------
/1. Supervised Learning Models/3. naive_bayes_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yfinance as yf
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import seaborn as sns
 6 | from sklearn.naive_bayes import GaussianNB
 7 | from sklearn.model_selection import train_test_split
 8 | from sklearn.metrics import accuracy_score, confusion_matrix
 9 | from fredapi import Fred
10 | 
11 | def fetch_data(ticker, start_date, end_date):
12 |     """Fetch stock or index data using yfinance."""
13 |     return yf.download(ticker, start=start_date, end=end_date)['Close'].values
14 | 
15 | def fetch_fred_data(api_key, series_id, start_date, end_date):
16 |     """Fetch macroeconomic data using FRED API."""
17 |     fred = Fred(api_key=api_key)
18 |     return fred.get_series(series_id, start_date, end_date).values
19 | 
20 | def create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment, look_back=1):
21 |     """Create dataset combining stock data and macroeconomic indicators."""
22 |     X, Y = [], []
23 |     for i in range(len(stock_data) - look_back - 1):
24 |         features = list(stock_data[i:(i + look_back)]) + [sp500_data[i], interest_rates[i], gdp_growth[i], unemployment[i]]
25 |         X.append(features)
26 |         Y.append(1 if stock_data[i + look_back] > stock_data[i + look_back - 1] else 0)
27 |     return np.array(X), np.array(Y)
28 | 
29 | if __name__ == "__main__":
30 |     # Get FRED API Key from environment variable
31 |     api_key = os.getenv('FRED_API_KEY')
32 |     if api_key is None:
33 |         print("Please set your FRED_API_KEY as an environment variable.")
34 |         exit()
35 | 
36 |     # Fetch and prepare various data
37 |     stock_data = fetch_data('AAPL', '2010-01-01', '2023-01-01')
38 |     sp500_data = fetch_data('^GSPC', '2010-01-01', '2023-01-01')
39 |     interest_rates = fetch_fred_data(api_key, 'TB3MS', '2010-01-01', '2023-01-01')
40 |     gdp_growth = fetch_fred_data(api_key, 'A191RL1Q225SBEA', '2010-01-01', '2023-01-01')
41 |     unemployment = fetch_fred_data(api_key, 'UNRATE', '2010-01-01', '2023-01-01')
42 | 
43 |     # Make sure all data series are of the same length
44 |     min_len = min(len(stock_data), len(sp500_data), len(interest_rates), len(gdp_growth), len(unemployment))
45 |     stock_data, sp500_data, interest_rates, gdp_growth, unemployment = stock_data[:min_len], sp500_data[:min_len], interest_rates[:min_len], gdp_growth[:min_len], unemployment[:min_len]
46 | 
47 |     # Create dataset
48 |     X, Y = create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment)
49 | 
50 |     # Split dataset
51 |     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
52 | 
53 |     # Train model
54 |     model = GaussianNB()
55 |     model.fit(X_train, Y_train)
56 |     pred = model.predict(X_test)
57 | 
58 |     # Evaluate model
59 |     accuracy = accuracy_score(Y_test, pred)
60 |     cm = confusion_matrix(Y_test, pred)
61 | 
62 |     # Create figure for visualization
63 |     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
64 | 
65 |     # Plot confusion matrix
66 |     sns.heatmap(cm, annot=True, fmt='g', ax=ax1, cmap='Blues')
67 |     ax1.set_xlabel('Predicted labels')
68 |     ax1.set_ylabel('True labels')
69 |     ax1.set_title('Confusion Matrix')
70 | 
71 |     # Add explanation text
72 |     explanation = (
73 |         f"Model: Gaussian Naive Bayes\n"
74 |         f"Accuracy: {accuracy:.2f}\n\n"
75 |         "Predictors:\n"
76 |         "- Previous day's stock price\n"
77 |         "- S&P 500 index\n"
78 |         "- Interest rates\n"
79 |         "- GDP growth rates\n"
80 |         "- Unemployment rates\n\n"
81 |         "Explanation:\n"
82 |         "Naive Bayes is a probabilistic classification algorithm.\n"
83 |         "In this context, it predicts whether the stock price will go up (1) or down (0) the next day based on Bayes' theorem.\n"
84 |         "Accuracy is the metric used to evaluate the model's performance.\n"
85 |         "The confusion matrix provides a summary of the number of correct and incorrect predictions."
86 |     )
87 |     ax2.axis('off')
88 |     ax2.text(0.1, 0.1, explanation, fontsize=12)
89 | 
90 |     # Save and show plot
91 |     plt.tight_layout()
92 |     plt.savefig('naive_bayes_summary_with_explanation.png')
93 |     plt.show()
94 |     
95 | 


--------------------------------------------------------------------------------
/1. Supervised Learning Models/4. random_forest_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import yfinance as yf
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | import seaborn as sns
  6 | from sklearn.ensemble import RandomForestClassifier
  7 | from sklearn.model_selection import train_test_split
  8 | from sklearn.metrics import accuracy_score, confusion_matrix
  9 | from fredapi import Fred
 10 | from textwrap import wrap
 11 | 
 12 | # Fetch stock data
 13 | def fetch_data(ticker, start_date, end_date):
 14 |     return yf.download(ticker, start=start_date, end=end_date)['Close'].values
 15 | 
 16 | # Fetch economic indicators using the FRED API
 17 | def fetch_fred_data(api_key, series_id, start_date, end_date):
 18 |     fred = Fred(api_key=api_key)
 19 |     return fred.get_series(series_id, start_date, end_date).values
 20 | 
 21 | # Create dataset with predictors and target
 22 | def create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment, look_back=1):
 23 |     X, Y = [], []
 24 |     for i in range(len(stock_data) - look_back - 1):
 25 |         features = list(stock_data[i:(i + look_back)]) + [sp500_data[i], interest_rates[i], gdp_growth[i], unemployment[i]]
 26 |         X.append(features)
 27 |         Y.append(1 if stock_data[i + look_back] > stock_data[i + look_back - 1] else 0)
 28 |     return np.array(X), np.array(Y)
 29 | 
 30 | if __name__ == "__main__":
 31 |     # Get FRED API key from environment variable
 32 |     api_key = os.getenv('FRED_API_KEY')
 33 |     if api_key is None:
 34 |         print("Please set your FRED_API_KEY as an environment variable.")
 35 |         exit()
 36 | 
 37 |     # Fetch data
 38 |     stock_data = fetch_data('AAPL', '2010-01-01', '2023-01-01')
 39 |     sp500_data = fetch_data('^GSPC', '2010-01-01', '2023-01-01')
 40 |     interest_rates = fetch_fred_data(api_key, 'TB3MS', '2010-01-01', '2023-01-01')
 41 |     gdp_growth = fetch_fred_data(api_key, 'A191RL1Q225SBEA', '2010-01-01', '2023-01-01')
 42 |     unemployment = fetch_fred_data(api_key, 'UNRATE', '2010-01-01', '2023-01-01')
 43 | 
 44 |     # After fetching the data, ensure they all have the same length
 45 |     min_len = min(len(stock_data), len(sp500_data), len(interest_rates), len(gdp_growth), len(unemployment))
 46 | 
 47 |     stock_data = stock_data[:min_len]
 48 |     sp500_data = sp500_data[:min_len]
 49 |     interest_rates = interest_rates[:min_len]
 50 |     gdp_growth = gdp_growth[:min_len]
 51 |     unemployment = unemployment[:min_len]
 52 | 
 53 | 
 54 |     # Create dataset
 55 |     X, Y = create_dataset(stock_data, sp500_data, interest_rates, gdp_growth, unemployment)
 56 |     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
 57 | 
 58 |     # Train Random Forest model
 59 |     model = RandomForestClassifier(n_estimators=100)
 60 |     model.fit(X_train, Y_train)
 61 |     pred = model.predict(X_test)
 62 | 
 63 |     # Evaluate the model
 64 |     accuracy = accuracy_score(Y_test, pred)
 65 |     cm = confusion_matrix(Y_test, pred)
 66 | 
 67 |     # Create visualization
 68 |     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
 69 | 
 70 |     # Plot confusion matrix
 71 |     sns.heatmap(cm, annot=True, fmt='g', ax=ax1, cmap='Blues')
 72 |     ax1.set_xlabel('Predicted labels')
 73 |     ax1.set_ylabel('True labels')
 74 |     ax1.set_title('Confusion Matrix')
 75 | 
 76 |     # Explanation text
 77 |     explanation = (
 78 |         f"Model: Random Forest\n"
 79 |         f"Accuracy: {accuracy:.2f}\n\n"
 80 |         "Predictors:\n"
 81 |         "- Previous day's stock price\n"
 82 |         "- S&P 500 index\n"
 83 |         "- Interest rates\n"
 84 |         "- GDP growth rates\n"
 85 |         "- Unemployment rates\n\n"
 86 |         "Explanation:\n"
 87 |         "The Random Forest model is an ensemble learning method primarily used for classification and regression tasks. "
 88 |         "It employs multiple decision trees during training and outputs the mode of classes (classification) or mean prediction (regression) of the individual trees for a more robust and accurate prediction.\n"
 89 |         "1. Bagging: Random Forest uses 'Bootstrap Aggregating' or Bagging, where random subsets of the training data are chosen with replacement to train each decision tree. "
 90 |         "This diversity ensures that each decision tree is different and prevents overfitting.\n"
 91 |         "2. Decision Trees: Each subset of data constructs a decision tree. Unlike a single decision tree that uses all features to make a decision at each node, "
 92 |         "Random Forest selects a random subset of features for every node split. This randomness contributes to 'decorrelating' the trees, thereby boosting the model's performance.\n"
 93 |         "3. Features: In our case, features like the previous day's stock price and S&P 500 index could be dominant factors in market movements. "
 94 |         "Interest rates influence investment sentiment, GDP growth rates show economic health, and unemployment rates can reflect consumer spending, all affecting the stock price. "
 95 |         "Random Forest takes all these features into account for each tree.\n"
 96 |         "4. Prediction: Once all trees are built, the model makes a prediction for a new data point by letting each tree in the ensemble 'vote' for a class. "
 97 |         "In your binary classification task (stock price going up as '1' or down as '0'), the majority vote will be the final output of the model.\n"
 98 |         "5. Majority Voting: The Random Forest uses 'majority voting' to finalize the prediction. "
 99 |         "The class that receives the most votes from all the trees in the forest becomes the model's prediction.\n"
100 |         "6. Equation for Classification: The final prediction, \(y\), is determined as \(y = \\mathrm{mode}(y_1, y_2, \\ldots, y_N)\), where \(N\) is the number of trees in the forest.\n"
101 |         "By aggregating the insights and 'votes' from multiple decision trees, Random Forest provides a more balanced and nuanced understanding of the complex relationships among the predictors."
102 |     )
103 | 
104 |     # Wrap the text to make it fit into the figure neatly
105 |     wrapped_text = "\n".join(wrap(explanation, width=80))  # 60 characters per line, adjust as needed
106 | 
107 |     ax2.axis('off')
108 |     ax2.text(0.01, 0.99, wrapped_text, fontsize=9, va='top')  # Aligns text at top left corner with a fontsize of 10
109 | 
110 |     plt.tight_layout()
111 |     plt.savefig('random_forest_summary_with_explanation.png')
112 |     plt.show()


--------------------------------------------------------------------------------
/1. Supervised Learning Models/README.md:
--------------------------------------------------------------------------------
1 | ![alt text](linear_regression_summary_with_explanation.png)
2 | ![alt text](logistic_regression_summary_with_explanation.png)
3 | ![alt text](naive_bayes_summary_with_explanation.png)
4 | ![alt text](random_forest_summary_with_explanation.png)


--------------------------------------------------------------------------------
/1. Supervised Learning Models/linear_regression_summary_with_explanation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/1. Supervised Learning Models/linear_regression_summary_with_explanation.png


--------------------------------------------------------------------------------
/1. Supervised Learning Models/logistic_regression_summary_with_explanation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/1. Supervised Learning Models/logistic_regression_summary_with_explanation.png


--------------------------------------------------------------------------------
/1. Supervised Learning Models/naive_bayes_summary_with_explanation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/1. Supervised Learning Models/naive_bayes_summary_with_explanation.png


--------------------------------------------------------------------------------
/1. Supervised Learning Models/random_forest_summary_with_explanation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/1. Supervised Learning Models/random_forest_summary_with_explanation.png


--------------------------------------------------------------------------------
/2. Unsupervised Learning Models/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/2. Unsupervised Learning Models/.DS_Store


--------------------------------------------------------------------------------
/2. Unsupervised Learning Models/1. clustering.py:
--------------------------------------------------------------------------------
 1 | # k-means clustering
 2 | import yfinance as yf
 3 | import pandas_datareader.data as web
 4 | import pandas as pd
 5 | from sklearn.cluster import KMeans
 6 | import matplotlib.pyplot as plt
 7 | from textwrap import wrap
 8 | 
 9 | # Download stock data from Yahoo Finance
10 | tickers = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'TSLA']
11 | stock_data = yf.download(tickers, start='2020-01-01', end='2021-01-01')['Adj Close']
12 | 
13 | # Download S&P 500 data from FRED
14 | sp500 = web.DataReader('SP500', 'fred', '2020-01-01', '2021-01-01')
15 | 
16 | # Combine stock and S&P 500 data
17 | data = pd.concat([stock_data, sp500], axis=1).dropna()
18 | data = data.pct_change().dropna()  # Calculate daily returns
19 | 
20 | # K-Means clustering
21 | kmeans = KMeans(n_clusters=3)
22 | kmeans.fit(data)
23 | labels = kmeans.labels_
24 | 
25 | # Create plot with subplots
26 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
27 | 
28 | # Main plot on ax1
29 | ax1.scatter(data.iloc[:, 0], data.iloc[:, 1], c=labels, cmap='viridis')
30 | ax1.set_title('K-Means Clustering of Stock Data and S&P 500')
31 | ax1.set_xlabel('AAPL Daily Returns')
32 | ax1.set_ylabel('GOOGL Daily Returns')
33 | 
34 | # Explanation on ax2
35 | explanation = (
36 |     "Algorithm: K-Means Clustering\n"
37 |     "Number of Clusters: 3\n"
38 |     "Data: Stock prices and S&P 500 index\n\n"
39 |     "Explanation:\n"
40 |     "K-means partitions the financial data into 'K' clusters based on daily returns. "
41 |     "While it's not generally used for prediction, it provides valuable insights into data structure. "
42 |     "These insights can be instrumental for:\n\n"
43 |     "- Portfolio Diversification: Identifying statistically similar assets for diversification.\n"
44 |     "- Risk Management: Recognizing asset groups for better hedging strategies.\n"
45 |     "- Market Regime Identification: Understanding different market states for dynamic trading."
46 | )
47 | wrapped_explanation = "\n".join(wrap(explanation, 50))  # Wraps the text at 50 characters
48 | 
49 | ax2.axis('off')
50 | ax2.text(0.01, 0.99, wrapped_explanation, fontsize=10, va='top', wrap=True)  # Aligns text at top left corner with a fontsize of 10
51 | 
52 | # Save and show plot
53 | plt.tight_layout()
54 | plt.savefig('kmeans_financial_data_with_explanation.png')
55 | plt.show()
56 | 


--------------------------------------------------------------------------------
/2. Unsupervised Learning Models/2. dimensionality_reduction.py:
--------------------------------------------------------------------------------
 1 | # Import necessary libraries
 2 | import yfinance as yf
 3 | import pandas_datareader.data as web
 4 | import pandas as pd
 5 | import numpy as np
 6 | from sklearn.decomposition import PCA
 7 | import matplotlib.pyplot as plt
 8 | from textwrap import wrap
 9 | 
10 | # Download stock data from Yahoo Finance
11 | tickers = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'TSLA']
12 | stock_data = yf.download(tickers, start='2020-01-01', end='2021-01-01')['Adj Close']
13 | 
14 | # Download S&P 500 data from FRED
15 | sp500 = web.DataReader('SP500', 'fred', '2020-01-01', '2021-01-01')
16 | 
17 | # Combine stock and S&P 500 data
18 | data = pd.concat([stock_data, sp500], axis=1).dropna()
19 | data = data.pct_change().dropna()  # Calculate daily returns
20 | 
21 | # Apply PCA
22 | pca = PCA(n_components=2)
23 | principal_components = pca.fit_transform(data)
24 | 
25 | # Analyze the components
26 | components_df = pd.DataFrame(pca.components_, columns=data.columns, index=[f'PC{i+1}' for i in range(2)])
27 | 
28 | # Create plot with subplots
29 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
30 | 
31 | # Main plot on ax1
32 | ax1.scatter(principal_components[:, 0], principal_components[:, 1], c='blue')
33 | ax1.set_title('PCA of Stock Data and S&P 500')
34 | ax1.set_xlabel('Principal Component 1')
35 | ax1.set_ylabel('Principal Component 2')
36 | 
37 | # Turn off axis for ax2
38 | ax2.axis('off')
39 | 
40 | # Initial explanation
41 | initial_explanation = (
42 |     "Algorithm: Principal Component Analysis (PCA)\n"
43 |     "Components: 2\n"
44 |     "Data: Stock prices and S&P 500 index\n\n"
45 |     "PCA reduces the dimensionality of the data by finding new variables (Principal Components) that maximize variance."
46 |     "This is useful for:\n"
47 |     "- Data Visualization: Reducing dimensions aids in visualizing complex data.\n"
48 |     "- Risk Modeling: Identifying primary risk factors in a portfolio.\n"
49 |     "- Factor Analysis: Understanding the underlying factors affecting asset prices.\n"
50 | )
51 | 
52 | # New section explaining the drivers of the components
53 | new_section = (
54 |     f"\nPrincipal Component 1 is most influenced by {components_df.loc['PC1'].idxmax()}.\n"
55 |     f"Principal Component 2 is most influenced by {components_df.loc['PC2'].idxmax()}.\n"
56 | )
57 | 
58 | # Combine initial explanation and new section
59 | full_explanation = initial_explanation + new_section
60 | 
61 | # Additional useful explanation
62 | why_useful = (
63 |     "\nUsefulness:\n"
64 |     "1. Portfolio Optimization: Identify key drivers of asset returns.\n"
65 |     "2. Risk Management: Uncover main risk factors.\n"
66 |     "3. Trading Strategies: Develop strategies based on hidden factors.\n"
67 |     "4. Data Visualization: Easier interpretation of high-dimensional data.\n"
68 |     "5. Correlation Structure: Simplify data complexity.\n"
69 |     "6. Market Regime Identification: Adapt trading strategies dynamically."
70 | )
71 | 
72 | # Combine the original explanation, the 'why useful' section, and the new section
73 | full_explanation += why_useful
74 | 
75 | wrapped_full_explanation = "\n".join(wrap(full_explanation, 50))
76 | ax2.text(0.01, 0.99, wrapped_full_explanation, fontsize=10, va='top', wrap=True)
77 | 
78 | # Save and show plot
79 | plt.tight_layout()
80 | plt.savefig('PCA_financial_data_with_full_explanation.png')
81 | plt.show()
82 | 
83 | # Output the components for further analysis
84 | print("Principal Component Analysis")
85 | print(components_df)
86 | 


--------------------------------------------------------------------------------
/2. Unsupervised Learning Models/PCA_financial_data_with_full_explanation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/2. Unsupervised Learning Models/PCA_financial_data_with_full_explanation.png


--------------------------------------------------------------------------------
/2. Unsupervised Learning Models/README.md:
--------------------------------------------------------------------------------
1 | ![alt text](kmeans_financial_data_with_explanation.png)
2 | ![alt text](PCA_financial_data_with_full_explanation.png)
3 | 


--------------------------------------------------------------------------------
/2. Unsupervised Learning Models/kmeans_financial_data_with_explanation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/2. Unsupervised Learning Models/kmeans_financial_data_with_explanation.png


--------------------------------------------------------------------------------
/3. Deep Learning Models/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/.DS_Store


--------------------------------------------------------------------------------
/3. Deep Learning Models/Anomaly_Detection_Using_Autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/Anomaly_Detection_Using_Autoencoder.png


--------------------------------------------------------------------------------
/3. Deep Learning Models/Apple_Stock_Price_Prediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/Apple_Stock_Price_Prediction.png


--------------------------------------------------------------------------------
/3. Deep Learning Models/Financial_News_Sentiment_Analysis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/Financial_News_Sentiment_Analysis.png


--------------------------------------------------------------------------------
/3. Deep Learning Models/GAN_Financial_Simulation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/GAN_Financial_Simulation.png


--------------------------------------------------------------------------------
/3. Deep Learning Models/README.md:
--------------------------------------------------------------------------------
1 | ![alt text](Apple_Stock_Price_Prediction.png)
2 | ![alt text](Financial_News_Sentiment_Analysis.png)
3 | ![alt text](Anomaly_Detection_Using_Autoencoder.png)
4 | ![alt text](GAN_Financial_Simulation.png)
5 | 
6 | 


--------------------------------------------------------------------------------
/3. Deep Learning Models/supervised_deep_learning_models/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/supervised_deep_learning_models/.DS_Store


--------------------------------------------------------------------------------
/3. Deep Learning Models/supervised_deep_learning_models/1. recurrent_neural_network_RNN_lstm.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Long Short-Term Memory networks (LSTMs) are a type of Recurrent Neural Network (RNN) and are typically used in the context
  3 | of supervised learning, particularly for sequence prediction problems like time series forecasting, natural language processing,
  4 | and more. In these applications, you usually have labeled data where the sequence input is associated with a corresponding output.
  5 | 
  6 | That being said, LSTMs can also be used in unsupervised learning scenarios. For example, you can use LSTMs in autoencoders for
  7 | sequence-to-sequence reconstruction, anomaly detection in time series data, or learning embeddings for sequences without explicit labels.
  8 | '''
  9 | 
 10 | '''
 11 | What is an LSTM?
 12 | Long Short-Term Memory (LSTM) is a type of recurrent neural network (RNN) architecture. An LSTM is designed to remember past information in sequence data and is widely used in time series analysis, natural language processing, and many other sequence-related tasks. Unlike standard feedforward neural networks, LSTMs have "memory" in the form of a cell state and hidden state, which helps them learn from the "context" or "sequence" of the inputs.
 13 | 
 14 | How it works?
 15 | Input Sequence: At each time step, the LSTM takes in an input and the previous cell state and hidden state 
 16 | 
 17 | Forget Gate: Decide what information from the cell state should be thrown away.
 18 | 
 19 | Input Gate: Update the cell state with new information.
 20 | 
 21 | Output Gate: Based on the cell state and the input, decide what should be the new hidden state 
 22 | 
 23 | New Cell State: Finally, calculate the new cell state 
 24 | 
 25 | Predictive Power
 26 | LSTMs are particularly useful for solving problems that require learning long-term dependencies. They are less susceptible to the vanishing gradient problem, which allows them to learn from data where the important features are separated by many time steps. This makes them highly efficient for various sequence-based tasks such as time-series prediction, sequence-to-sequence mapping, and so on.
 27 | 
 28 | In finance, LSTMs can be used for predicting stock prices, forex trading, and even for algorithmic trading strategies. However, it's crucial to note that the financial markets are influenced by a multitude of factors, many of which can be non-sequential or not included in the model. So while LSTMs can capture patterns in past data efficiently, they are by no means a guarantee for high accuracy in financial predictions.
 29 | 
 30 | By setting up a proper evaluation metric (like RMSE for regression tasks, or F1-score for classification tasks), you can get a quantitative measure of how well your LSTM model is likely to perform on unseen data.
 31 | '''
 32 | import numpy as np
 33 | import pandas as pd
 34 | import yfinance as yf
 35 | from sklearn.preprocessing import MinMaxScaler
 36 | from tensorflow.keras.models import Sequential
 37 | from tensorflow.keras.layers import LSTM, Dense
 38 | from sklearn.metrics import mean_squared_error, mean_absolute_error
 39 | from math import sqrt
 40 | import matplotlib.pyplot as plt
 41 | 
 42 | # Download the Apple stock price data
 43 | data = yf.download('AAPL', start='2019-01-01', end='2021-01-01')
 44 | data = data[['Close']]
 45 | 
 46 | # Data Preprocessing
 47 | scaler = MinMaxScaler(feature_range=(0, 1))
 48 | scaled_data = scaler.fit_transform(data.values)
 49 | 
 50 | # Create a dataset for training the LSTM model
 51 | train_data = scaled_data[:int(0.8 * len(scaled_data))]
 52 | x_train, y_train = [], []
 53 | for i in range(60, len(train_data)):
 54 |     x_train.append(train_data[i-60:i, 0])
 55 |     y_train.append(train_data[i, 0])
 56 | 
 57 | x_train, y_train = np.array(x_train), np.array(y_train)
 58 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
 59 | 
 60 | # Debugging
 61 | print(f"Total data length: {len(data)}")
 62 | print(f"Training data length: {len(train_data)}")
 63 | 
 64 | # Building and Training the LSTM Model
 65 | model = Sequential()
 66 | model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
 67 | model.add(LSTM(units=50, return_sequences=False))
 68 | model.add(Dense(units=25))
 69 | model.add(Dense(units=1))
 70 | 
 71 | # Compile and train the model
 72 | model.compile(optimizer='adam', loss='mean_squared_error')
 73 | history = model.fit(x_train, y_train, batch_size=1, epochs=1)
 74 | 
 75 | # Output the training loss
 76 | print(f"Training loss: {history.history['loss'][0]}")
 77 | 
 78 | # Testing the Model
 79 | test_data = scaled_data[int(0.8 * len(scaled_data)) - 60:]
 80 | x_test, y_test = [], []
 81 | for i in range(60, len(test_data)):
 82 |     x_test.append(test_data[i-60:i, 0])
 83 |     y_test.append(test_data[i, 0])
 84 | 
 85 | x_test, y_test = np.array(x_test), np.array(y_test)
 86 | x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
 87 | 
 88 | # Make predictions using the test set
 89 | predicted_price = model.predict(x_test)
 90 | predicted_price = scaler.inverse_transform(np.reshape(predicted_price, (-1, 1)))
 91 | 
 92 | # Calculate Test Loss
 93 | test_loss = model.evaluate(x_test, y_test)
 94 | print(f"Test loss: {test_loss}")
 95 | 
 96 | # Calculate Root Mean Square Error (RMSE)
 97 | rmse = sqrt(mean_squared_error(y_test, predicted_price))
 98 | print(f'Root Mean Square Error (RMSE): {rmse}')
 99 | 
100 | # Calculate Mean Absolute Error (MAE)
101 | mae = mean_absolute_error(y_test, predicted_price)
102 | print(f'Mean Absolute Error (MAE): {mae}')
103 | 
104 | # Visualizing the Results
105 | plt.figure(figsize=(16, 8))
106 | 
107 | # Plot the real stock price
108 | plt.plot(data.index, data['Close'], label='True Price')
109 | 
110 | # Generate the index for the predicted price
111 | predicted_index = data.index[-100:]  # Adjust the number to match the new shape
112 | 
113 | # Debugging: Verifying dimensions before plotting
114 | print(f"Shape of predicted_price: {predicted_price.shape}")
115 | print(f"Shape of predicted_index: {len(predicted_index)}")
116 | print(f"First few elements of predicted_index: {predicted_index[:5]}")
117 | print(f"Last few elements of predicted_index: {predicted_index[-5:]}")
118 | print(f"Length of data.index: {len(data.index)}")
119 | print(f"Length of train_data: {len(train_data)}")
120 | print(f"Length of train_data + 60: {len(train_data) + 60}")
121 | 
122 | 
123 | # Adjusting predicted_price to match the length of predicted_index
124 | predicted_price = predicted_price[:len(predicted_index)]
125 | 
126 | # Plot the predicted stock price
127 | if len(predicted_index) == predicted_price.shape[0]:
128 |     plt.plot(predicted_index, predicted_price.flatten(), label='Predicted Price')
129 | else:
130 |     print("Shape mismatch: Skipping plotting of predicted prices")
131 | 
132 | # Add performance metrics and explanations to the plot
133 | metrics_text = f'''Test Loss: {0.0194} (Lower is better)
134 | RMSE: {109.8886} (Lower is better, dependent on scale of target variable)
135 | MAE: {109.8511} (Lower is better, dependent on scale of target variable)'''
136 | 
137 | plt.text(0.02, 0.5, metrics_text, transform=plt.gca().transAxes, fontsize=12, verticalalignment='center', bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10})
138 | 
139 | plt.legend()
140 | plt.title("Apple Stock Price Prediction using LSTM")
141 | plt.xlabel("Date")
142 | plt.ylabel("Stock Price")
143 | 
144 | # Save the plot as a .png file
145 | plt.savefig('Apple_Stock_Price_Prediction.png')
146 | 
147 | # Show the plot
148 | plt.show()
149 | 


--------------------------------------------------------------------------------
/3. Deep Learning Models/supervised_deep_learning_models/2. convolutional_neural_networks_(CNNs).py:
--------------------------------------------------------------------------------
 1 | # CNNs are often supervised models (labeled data used for training), primarily used for image classification
 2 | 
 3 | from sklearn.model_selection import train_test_split
 4 | from tensorflow.keras.models import Sequential
 5 | from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | # Simulated data (PLEASE replace with real scraped data for any serious application)
10 | headlines = ["Stocks are up today", "Markets crash due to economic instability", "Neutral day in the market"] * 10  # Replicating for more data
11 | labels = [1, 0, 2] * 10  # 1: positive, 0: negative, 2: neutral
12 | 
13 | # Text Preprocessing
14 | from tensorflow.keras.preprocessing.text import Tokenizer
15 | from tensorflow.keras.preprocessing.sequence import pad_sequences
16 | 
17 | tokenizer = Tokenizer()
18 | tokenizer.fit_on_texts(headlines)
19 | vocab_size = len(tokenizer.word_index) + 1
20 | max_length = max([len(s.split()) for s in headlines])
21 | 
22 | sequences = tokenizer.texts_to_sequences(headlines)
23 | padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')
24 | 
25 | # Data Splitting
26 | X_train, X_test, y_train, y_test = train_test_split(padded_sequences, np.array(labels), test_size=0.2, random_state=42)
27 | 
28 | # Model Building
29 | model = Sequential()
30 | model.add(Embedding(vocab_size, 16, input_length=max_length))
31 | model.add(Conv1D(16, 3, activation='relu'))
32 | model.add(GlobalMaxPooling1D())
33 | model.add(Dense(3, activation='softmax'))
34 | 
35 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
36 | history = model.fit(X_train, y_train, epochs=10, batch_size=2, validation_split=0.2)
37 | 
38 | # Testing
39 | loss, accuracy = model.evaluate(X_test, y_test)
40 | print(f"Test Loss: {loss}")
41 | print(f"Test Accuracy: {accuracy}")
42 | 
43 | # Visualization
44 | plt.figure(figsize=(16, 8))
45 | 
46 | plt.subplot(1, 2, 1)
47 | plt.plot(history.history['accuracy'], label='Train Accuracy')
48 | plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
49 | plt.title('Model Accuracy')
50 | plt.xlabel('Epochs')
51 | plt.ylabel('Accuracy')
52 | plt.legend()
53 | 
54 | plt.subplot(1, 2, 2)
55 | plt.plot(history.history['loss'], label='Train Loss')
56 | plt.plot(history.history['val_loss'], label='Validation Loss')
57 | plt.title('Model Loss')
58 | plt.xlabel('Epochs')
59 | plt.ylabel('Loss')
60 | plt.legend()
61 | 
62 | # Add performance metrics and explanations to the plot
63 | metrics_text = f'''Test Loss: {loss:.4f} (Lower is better)
64 | Test Accuracy: {accuracy:.4f} (Higher is better)'''
65 | 
66 | plt.gcf().text(0.02, 0.5, metrics_text, fontsize=12, verticalalignment='center', bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10})
67 | 
68 | # Add model explanation to the plot
69 | model_explanation = '''This CNN model analyzes financial news headlines to categorize the sentiment as Positive, Negative, or Neutral.
70 | The model is trained on tokenized text data, and uses Conv1D layers to identify local patterns within the text.
71 | After training, the model is evaluated on a separate test set to assess its predictive accuracy.'''
72 | 
73 | plt.gcf().text(0.6, 0.2, model_explanation, fontsize=6, verticalalignment='center', bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10})
74 | 
75 | plt.suptitle("Financial News Sentiment Analysis using CNN")
76 | plt.tight_layout(rect=[0, 0.03, 1, 0.95])
77 | 
78 | # Save the plot as a .png file
79 | plt.savefig('Financial_News_Sentiment_Analysis.png')
80 | 
81 | plt.show()
82 | 


--------------------------------------------------------------------------------
/3. Deep Learning Models/unsupervised_deep_learning_models/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/3. Deep Learning Models/unsupervised_deep_learning_models/.DS_Store


--------------------------------------------------------------------------------
/3. Deep Learning Models/unsupervised_deep_learning_models/3. autoencoders.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from sklearn.datasets import make_classification
 4 | from sklearn.preprocessing import StandardScaler
 5 | from sklearn.model_selection import train_test_split
 6 | from tensorflow.keras.layers import Input, Dense
 7 | from tensorflow.keras.models import Model
 8 | 
 9 | # Generate synthetic financial data (replace with real financial data)
10 | X, _ = make_classification(n_samples=1000, n_features=20)
11 | X = StandardScaler().fit_transform(X)
12 | X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)
13 | 
14 | # Create autoencoder model
15 | input_layer = Input(shape=(20,))
16 | encoded = Dense(14, activation='relu')(input_layer)
17 | decoded = Dense(20, activation='sigmoid')(encoded)
18 | 
19 | autoencoder = Model(inputs=input_layer, outputs=decoded)
20 | autoencoder.compile(optimizer='adam', loss='mean_squared_error')
21 | 
22 | # Train the model
23 | history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)
24 | 
25 | # Use the trained autoencoder to predict test data
26 | X_test_predictions = autoencoder.predict(X_test)
27 | mse = np.mean(np.power(X_test - X_test_predictions, 2), axis=1)
28 | 
29 | # Set a threshold for anomaly detection
30 | threshold = np.quantile(mse, 0.95)
31 | 
32 | # Visualize results
33 | fig, axes = plt.subplots(1, 2, figsize=(24, 12))
34 | 
35 | # Plot histogram
36 | axes[0].hist(mse, bins=50, alpha=0.6, color='g', label='Normal')
37 | axes[0].axvline(x=threshold, color='r', linestyle='dashed', linewidth=2, label=f'Anomaly threshold ({threshold:.4f})')
38 | axes[0].set_title("Anomaly Detection using Autoencoder in Finance")
39 | axes[0].set_xlabel("Mean Squared Error (MSE)")
40 | axes[0].set_ylabel("Frequency")
41 | axes[0].legend()
42 | 
43 | # Plot MSE over samples
44 | axes[1].plot(mse, label='MSE')
45 | axes[1].axhline(y=threshold, color='r', linestyle='dashed', linewidth=2, label=f'Anomaly threshold ({threshold:.4f})')
46 | axes[1].scatter(np.where(mse > threshold), mse[mse > threshold], color='r', zorder=5, label='Anomalies')
47 | axes[1].set_title("MSE Values Over Test Samples")
48 | axes[1].set_xlabel("Test Sample Index")
49 | axes[1].set_ylabel("Mean Squared Error (MSE)")
50 | axes[1].legend()
51 | 
52 | # Add key statistics
53 | stats_text = f'''Key Statistics:
54 | - Training Data Size: {X_train.shape[0]}
55 | - Test Data Size: {X_test.shape[0]}
56 | - Anomaly Threshold (95 percentile): {threshold:.4f}'''
57 | 
58 | fig.text(0.15, 0.1, stats_text, fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.8, 'pad': 10})
59 | 
60 | # Add model explanation
61 | model_explanation = '''Model Explanation:
62 | Autoencoders can be valuable in detecting anomalies in trading and identifying fraudulent transactions.
63 | By training the autoencoder on 'normal' financial data, it learns to reconstruct similar data efficiently.
64 | Anomalies (unusual patterns) result in higher reconstruction errors (MSE), making them identifiable.'''
65 | 
66 | fig.text(0.6, 0.1, model_explanation, fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.8, 'pad': 10})
67 | 
68 | # Save the plot as a PNG file
69 | plt.savefig('Anomaly_Detection_Using_Autoencoder.png')
70 | 
71 | plt.show()
72 | 


--------------------------------------------------------------------------------
/3. Deep Learning Models/unsupervised_deep_learning_models/4. generative_adversarial_networks_(GANs).py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from tensorflow.keras.models import Model
 4 | from tensorflow.keras.layers import Input, Dense
 5 | from tensorflow.keras.optimizers import Adam
 6 | 
 7 | # Generate some synthetic "real" financial market return data (replace with real data)
 8 | np.random.seed(0)
 9 | real_data = np.random.normal(0, 1, (1000, 1))
10 | 
11 | # Generator Model
12 | input_noise = Input(shape=(10,))
13 | hidden_layer_g = Dense(30, activation='relu')(input_noise)
14 | generated_data = Dense(1, activation='linear')(hidden_layer_g)
15 | generator = Model(inputs=input_noise, outputs=generated_data)
16 | 
17 | # Discriminator Model
18 | input_real_data = Input(shape=(1,))
19 | hidden_layer_d = Dense(30, activation='relu')(input_real_data)
20 | validity = Dense(1, activation='sigmoid')(hidden_layer_d)
21 | discriminator = Model(inputs=input_real_data, outputs=validity)
22 | discriminator.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
23 | 
24 | # GAN Model
25 | discriminator.trainable = False
26 | gan_output = discriminator(generator(input_noise))
27 | gan = Model(inputs=input_noise, outputs=gan_output)
28 | gan.compile(loss='binary_crossentropy', optimizer=Adam())
29 | 
30 | # Training parameters
31 | epochs = 1000
32 | batch_size = 32
33 | 
34 | # Train GAN
35 | for epoch in range(epochs):
36 |     # Train Discriminator
37 |     noise = np.random.normal(0, 1, (batch_size, 10))
38 |     generated_data = generator.predict(noise)
39 |     real_data_batch = real_data[np.random.randint(0, real_data.shape[0], batch_size)]
40 |     labels_real = np.ones((batch_size, 1))
41 |     labels_fake = np.zeros((batch_size, 1))
42 |     d_loss_real = discriminator.train_on_batch(real_data_batch, labels_real)
43 |     d_loss_fake = discriminator.train_on_batch(generated_data, labels_fake)
44 |     d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
45 | 
46 |     # Train Generator
47 |     noise = np.random.normal(0, 1, (batch_size, 10))
48 |     labels_gan = np.ones((batch_size, 1))
49 |     g_loss = gan.train_on_batch(noise, labels_gan)
50 | 
51 | # Generate data to visualize
52 | noise = np.random.normal(0, 1, (1000, 10))
53 | generated_data = generator.predict(noise)
54 | 
55 | # Create the plot
56 | fig, ax = plt.subplots(figsize=(12, 6))
57 | 
58 | ax.hist(real_data, alpha=0.5, label='Real Data')
59 | ax.hist(generated_data, alpha=0.5, label='Generated Data')
60 | ax.set_title('GAN for Simulating Financial Market Conditions')
61 | ax.set_xlabel('Market Returns')
62 | ax.set_ylabel('Frequency')
63 | ax.legend()
64 | 
65 | # Model Description and Key Stats
66 | description = '''Model Description:
67 | The GAN model consists of a Generator and a Discriminator.
68 | The Generator tries to produce synthetic financial data, while the Discriminator tries to distinguish between real and synthetic data.
69 | After training, we use the Generator to simulate different market conditions for assessing potential risks associated with various investment strategies.'''
70 | 
71 | stats = f'''Key Stats:
72 | - Number of epochs: {epochs}
73 | - Batch size: {batch_size}
74 | - Discriminator Loss: {d_loss[0]:.4f}
75 | - Generator Loss: {g_loss:.4f}'''
76 | 
77 | fig.text(0.2, 0.55, description, fontsize=6, bbox={'facecolor': 'white', 'alpha': 0.8, 'pad': 10})
78 | fig.text(0.65, 0.25, stats, fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.8, 'pad': 10})
79 | 
80 | # Save plot
81 | plt.savefig('GAN_Financial_Simulation.png')
82 | plt.show()
83 | 


--------------------------------------------------------------------------------
/4. Reinforcement Learning Models/Q_Learning_Stock_Trading_YFinance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/4. Reinforcement Learning Models/Q_Learning_Stock_Trading_YFinance.png


--------------------------------------------------------------------------------
/4. Reinforcement Learning Models/README.md:
--------------------------------------------------------------------------------
1 | ![alt text](Q_Learning_Stock_Trading_YFinance.png)
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/4. Reinforcement Learning Models/q_learning.py:
--------------------------------------------------------------------------------
 1 | import yfinance as yf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Download stock data from Yahoo Finance
 6 | data = yf.download("AAPL", start="2021-01-01", end="2021-02-01")["Close"]
 7 | stock_prices = data.values
 8 | dates = data.index
 9 | 
10 | # Initialize Q-Learning parameters
11 | INITIAL_BALANCE = 1000.0
12 | N_TRADING_DAYS = len(stock_prices)
13 | 
14 | # Initialize Q-Table
15 | q_table = np.zeros(3)  # Buy, Sell, Hold
16 | 
17 | # Initialize records for rewards and balances
18 | balances = [INITIAL_BALANCE]
19 | 
20 | # Initialize records for actions
21 | actions = []
22 | 
23 | # Hyperparameters
24 | epsilon = 0.2  # Exploration vs Exploitation
25 | lr = 0.1  # Learning rate
26 | gamma = 0.99  # Discount factor
27 | 
28 | # Simulation
29 | for day in range(N_TRADING_DAYS - 1):
30 |     state = balances[-1]
31 |     stock_price = stock_prices[day]
32 |     next_stock_price = stock_prices[day + 1]
33 | 
34 |     # Epsilon-greedy action selection
35 |     action = np.random.randint(3) if np.random.rand() < epsilon else np.argmax(q_table)
36 | 
37 |     # Reward function
38 |     reward = 0
39 |     if action == 0:  # Buy
40 |         reward = next_stock_price - stock_price
41 |     elif action == 1:  # Sell
42 |         reward = stock_price - next_stock_price
43 | 
44 |     # Q-Learning Update
45 |     next_state = state + reward
46 |     q_table[action] = q_table[action] + lr * (reward + gamma * np.max(q_table) - q_table[action])
47 | 
48 |     # Record action and balance
49 |     actions.append(action)
50 |     balances.append(next_state)
51 | 
52 | # Generate Plot
53 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
54 | 
55 | # Stock prices with Buy/Sell markers
56 | buy_dates = [dates[i] for i in range(len(actions)) if actions[i] == 0]
57 | sell_dates = [dates[i] for i in range(len(actions)) if actions[i] == 1]
58 | 
59 | ax1.plot(dates, stock_prices, label='Stock Price')
60 | ax1.scatter(buy_dates, [stock_prices[i] for i in range(len(actions)) if actions[i] == 0], marker='^', color='g', label='Buy', zorder=5)
61 | ax1.scatter(sell_dates, [stock_prices[i] for i in range(len(actions)) if actions[i] == 1], marker='v', color='r', label='Sell', zorder=5)
62 | 
63 | ax1.set_title("Backtest with Buy/Sell Indicators")
64 | ax1.set_xlabel("Date")
65 | ax1.set_ylabel("Stock Price")
66 | ax1.legend()
67 | 
68 | # Balances
69 | ax2.plot(dates, [INITIAL_BALANCE] + balances[:-1])
70 | ax2.set_title("Balance Over Time")
71 | ax2.set_xlabel("Date")
72 | ax2.set_ylabel("Balance")
73 | 
74 | # Description and Statistics
75 | statistics = f"""Parameters:
76 | - Number of trading days: {N_TRADING_DAYS}
77 | - Learning rate: {lr}
78 | - Discount factor: {gamma}
79 | 
80 | Statistics:
81 | - Final Balance: {balances[-1]:.2f}
82 | """
83 | 
84 | model_description = """This Q-learning model simulates stock trading decisions. 
85 | It decides whether to buy, sell, or hold based on the history of stock prices. 
86 | Green markers (^) indicate buying points, and red markers (v) indicate selling points."""
87 | 
88 | fig.text(0.2, 0.70, statistics, fontsize=7)
89 | fig.text(0.65, 0.70, model_description, fontsize=6)
90 | 
91 | plt.suptitle("Stock Trading Simulation using Q-Learning with Yahoo Finance Data")
92 | plt.tight_layout(rect=[0, 0.03, 1, 0.95])
93 | 
94 | # Save plot
95 | plt.savefig("Q_Learning_Stock_Trading_YFinance.png")
96 | 
97 | plt.show()
98 | 


--------------------------------------------------------------------------------
/5. ML Applications In Finance/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/.DS_Store


--------------------------------------------------------------------------------
/5. ML Applications In Finance/01. Risk Management ✅/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/01. Risk Management ✅/.DS_Store


--------------------------------------------------------------------------------
/5. ML Applications In Finance/01. Risk Management ✅/1. credit_scoring✅.py:
--------------------------------------------------------------------------------
  1 | # Use of supervised algorithms to predict the likelihood of a borrower defaulting on a loan.
  2 | 
  3 | '''
  4 | Python script for credit scoring using machine learning. We'll use scikit-learn to create a model that predicts the creditworthiness
  5 | of an individual based on some features like income, age, and loan amount.
  6 | '''
  7 | 
  8 | # Import libraries
  9 | import pandas as pd
 10 | import numpy as np
 11 | import matplotlib.pyplot as plt
 12 | from sklearn.model_selection import train_test_split
 13 | from sklearn.ensemble import RandomForestClassifier
 14 | from sklearn.metrics import accuracy_score, confusion_matrix
 15 | 
 16 | # Sample data: [income, age, loan_amount]
 17 | # The target variable is 'creditworthy', where 1 means creditworthy and 0 means not creditworthy
 18 | data = {
 19 |     'income': [50000, 75000, 30000, 100000, 65000, 42000, 120000, 110000, 95000, 67000],
 20 |     'age': [25, 45, 35, 50, 23, 33, 55, 40, 48, 20],
 21 |     'loan_amount': [25000, 50000, 15000, 100000, 45000, 27000, 80000, 38000, 62000, 20000],
 22 |     'creditworthy': [1, 1, 0, 1, 0, 0, 1, 1, 1, 0]
 23 | }
 24 | 
 25 | # Convert the dictionary into a DataFrame
 26 | df = pd.DataFrame(data)
 27 | 
 28 | # Separate the features (X) from the target variable (y)
 29 | X = df[['income', 'age', 'loan_amount']]
 30 | y = df['creditworthy']
 31 | 
 32 | # Plotting data points
 33 | plt.figure(figsize=(12, 6))
 34 | 
 35 | plt.subplot(1, 3, 1)
 36 | plt.scatter(df['income'], df['creditworthy'], c=df['creditworthy'])
 37 | plt.xlabel('Income')
 38 | plt.ylabel('Creditworthy')
 39 | plt.title('Income vs Creditworthiness')
 40 | 
 41 | plt.subplot(1, 3, 2)
 42 | plt.scatter(df['age'], df['creditworthy'], c=df['creditworthy'])
 43 | plt.xlabel('Age')
 44 | plt.ylabel('Creditworthy')
 45 | plt.title('Age vs Creditworthiness')
 46 | 
 47 | plt.subplot(1, 3, 3)
 48 | plt.scatter(df['loan_amount'], df['creditworthy'], c=df['creditworthy'])
 49 | plt.xlabel('Loan Amount')
 50 | plt.ylabel('Creditworthy')
 51 | plt.title('Loan Amount vs Creditworthiness')
 52 | 
 53 | plt.tight_layout()
 54 | plt.show()
 55 | 
 56 | # Split the data into training and test sets
 57 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 58 | 
 59 | # Initialize the Random Forest Classifier
 60 | clf = RandomForestClassifier()
 61 | 
 62 | # Train the model
 63 | clf.fit(X_train, y_train)
 64 | 
 65 | # Make predictions on the test set
 66 | y_pred = clf.predict(X_test)
 67 | 
 68 | # Evaluate the model
 69 | accuracy = accuracy_score(y_test, y_pred)
 70 | conf_matrix = confusion_matrix(y_test, y_pred)
 71 | 
 72 | print(f'Accuracy: {accuracy}')
 73 | print(f'Confusion Matrix: \n{conf_matrix}')
 74 | 
 75 | # Function to predict if an individual is creditworthy
 76 | def predict_creditworthiness(income, age, loan_amount):
 77 |     prediction = clf.predict([[income, age, loan_amount]])[0]
 78 |     
 79 |     if prediction == 1:
 80 |         return "The individual is creditworthy."
 81 |     else:
 82 |         return "The individual is not creditworthy."
 83 | 
 84 | # Example usage of the prediction function
 85 | print(predict_creditworthiness(70000, 30, 40000))  # Should generally return "The individual is creditworthy."
 86 | print(predict_creditworthiness(30000, 25, 60000))  # Should generally return "The individual is not creditworthy."
 87 | 
 88 | '''
 89 | Here's what each part of the code does:
 90 | 
 91 | Import Libraries: The necessary Python libraries for data manipulation and machine learning are imported.
 92 | 
 93 | Sample Data: We create a DataFrame from a dictionary, where each entry corresponds to an individual's attributes like income, age, and loan amount. The target variable is creditworthy, where 1 means the individual is creditworthy and 0 means they are not.
 94 | 
 95 | Data Splitting: We divide the data into a training set and a test set using the train_test_split() function from scikit-learn.
 96 | 
 97 | Model Initialization and Training: We use a Random Forest Classifier to train our model on the training set.
 98 | 
 99 | Evaluation: We evaluate the model using the test set and print out the accuracy and confusion matrix.
100 | 
101 | Prediction Function: We define a function predict_creditworthiness() that uses our trained model to predict whether an individual is creditworthy based on their income, age, and loan amount.
102 | 
103 | Example Usage: We call the predict_creditworthiness() function with sample data to demonstrate how to use it.
104 | 
105 | Remember that this is just a basic example. In a real-world application, you'd need a much larger dataset and you'd also spend time tuning the model and perhaps using more sophisticated methods for evaluation.
106 | '''


--------------------------------------------------------------------------------
/5. ML Applications In Finance/01. Risk Management ✅/2. value-at-risk_modeling✅.py:
--------------------------------------------------------------------------------
 1 | #  Estimation of the potential losses an investment portfolio could face over a specified period for a given confidence interval.
 2 | 
 3 | '''
 4 | One common approach is to use a machine learning model to predict future returns, and then calculate VaR based on these predictions.
 5 | Below is an example using a Random Forest model to predict future stock returns and subsequently calculate VaR.
 6 | '''
 7 | 
 8 | # Import libraries
 9 | import numpy as np
10 | import pandas as pd
11 | import matplotlib.pyplot as plt
12 | from sklearn.ensemble import RandomForestRegressor
13 | from sklearn.model_selection import train_test_split
14 | from sklearn.metrics import mean_squared_error
15 | 
16 | # Generate synthetic stock returns data
17 | # In a real-world application, you would fetch this data from a reliable source
18 | np.random.seed(42)
19 | n_data_points = 1000
20 | stock_returns = np.random.normal(0, 1, n_data_points)
21 | 
22 | # Create a DataFrame
23 | df = pd.DataFrame(stock_returns, columns=['Returns'])
24 | 
25 | # Feature engineering: use lagged returns as features
26 | for i in range(1, 6):
27 |     df[f'Lag_{i}'] = df['Returns'].shift(i)
28 | 
29 | # Remove NaN
30 | df = df.dropna()
31 | 
32 | # Split into features (X) and target (y)
33 | X = df.drop('Returns', axis=1)
34 | y = df['Returns']
35 | 
36 | # Split data into training and test sets
37 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
38 | 
39 | # Initialize and train the Random Forest model
40 | model = RandomForestRegressor(n_estimators=100, random_state=42)
41 | model.fit(X_train, y_train)
42 | 
43 | # Make predictions
44 | y_pred = model.predict(X_test)
45 | 
46 | # Evaluate the model
47 | mse = mean_squared_error(y_test, y_pred)
48 | print(f'Mean Squared Error: {mse}')
49 | 
50 | # Calculate VaR
51 | confidence_level = 0.05
52 | VaR = np.quantile(y_pred, confidence_level)
53 | 
54 | print(f'Value-at-Risk (VaR) at {confidence_level * 100}% confidence level is {VaR}')
55 | 
56 | # Plot predicted returns and VaR
57 | plt.figure(figsize=(10, 6))
58 | plt.hist(y_pred, bins=30, alpha=0.75, color='blue', label='Predicted Returns')
59 | plt.axvline(x=VaR, color='r', linestyle='--', label=f'VaR at {confidence_level * 100}% confidence level')
60 | plt.xlabel('Predicted Return')
61 | plt.ylabel('Frequency')
62 | plt.title('Value-at-Risk (VaR) using Machine Learning')
63 | plt.legend()
64 | plt.show()
65 | 
66 | 
67 | '''
68 | Explanation:
69 | 
70 | Generate Synthetic Stock Returns: The code generates synthetic stock return data for demonstration purposes.
71 | 
72 | Feature Engineering: Lagged returns are used as features for the machine learning model.
73 | 
74 | Train-Test Split: The data is split into training and test sets.
75 | 
76 | Random Forest Model: A Random Forest Regressor model is trained on the data.
77 | 
78 | Prediction: The model predicts future returns on the test set.
79 | 
80 | Evaluation: The model is evaluated using Mean Squared Error (MSE).
81 | 
82 | Calculate VaR: VaR is calculated based on predicted returns using the numpy quantile function.
83 | 
84 | Plot: The predicted returns and VaR are plotted.
85 | 
86 | The red line in the plot indicates the VaR at a 5% confidence level. According to this model, we are 95% confident that the worst daily loss will not exceed this value.
87 | 
88 | Note: This is a very simplified example for demonstration purposes. In a real-world scenario, the data would be more complex, and additional steps such as data normalization, hyperparameter tuning, and validation would be necessary.
89 | '''


--------------------------------------------------------------------------------
/5. ML Applications In Finance/01. Risk Management ✅/3. fraud_detection✅.py:
--------------------------------------------------------------------------------
 1 | # Anomaly detection to identify unusual patterns which could suggest fraudulent transactions.
 2 | 
 3 | '''
 4 | a Python script that demonstrates a simple approach to fraud detection using machine learning.
 5 | In this example, we'll use the RandomForestClassifier from scikit-learn to classify transactions as either "fraudulent" or "genuine".
 6 | '''
 7 | 
 8 | # Import Libraries
 9 | import numpy as np
10 | import pandas as pd
11 | import matplotlib.pyplot as plt
12 | from sklearn.model_selection import train_test_split
13 | from sklearn.ensemble import RandomForestClassifier
14 | from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
15 | from sklearn.preprocessing import StandardScaler
16 | 
17 | # Generate synthetic data for demonstration
18 | # In a real-world application, replace this with actual data
19 | np.random.seed(42)
20 | n_samples = 1000
21 | 
22 | # Genuine transactions are centered around (0, 0)
23 | genuine = np.random.normal(0, 1, (int(n_samples * 0.95), 2))
24 | genuine_labels = np.zeros(int(n_samples * 0.95))
25 | 
26 | # Fraudulent transactions are centered around (5, 5)
27 | fraud = np.random.normal(5, 1, (int(n_samples * 0.05), 2))
28 | fraud_labels = np.ones(int(n_samples * 0.05))
29 | 
30 | # Combine into one dataset
31 | X = np.vstack([genuine, fraud])
32 | y = np.hstack([genuine_labels, fraud_labels])
33 | 
34 | # Data Preprocessing: Feature Scaling
35 | scaler = StandardScaler()
36 | X = scaler.fit_transform(X)
37 | 
38 | # Split data into training and test sets
39 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
40 | 
41 | # Initialize and train RandomForest Classifier
42 | clf = RandomForestClassifier(random_state=42)
43 | clf.fit(X_train, y_train)
44 | 
45 | # Predict on test set
46 | y_pred = clf.predict(X_test)
47 | 
48 | # Evaluation Metrics
49 | print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
50 | print(f'Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}')
51 | print(f'Classification Report: \n{classification_report(y_test, y_pred)}')
52 | 
53 | # Plotting
54 | plt.figure(figsize=(10, 6))
55 | 
56 | # Plot genuine transactions
57 | plt.scatter(X_test[y_test == 0][:, 0], X_test[y_test == 0][:, 1], label='Genuine', alpha=0.5)
58 | 
59 | # Plot fraudulent transactions
60 | plt.scatter(X_test[y_test == 1][:, 0], X_test[y_test == 1][:, 1], label='Fraud', alpha=0.5)
61 | 
62 | # Highlight false negatives
63 | plt.scatter(X_test[(y_test == 1) & (y_pred == 0)][:, 0], X_test[(y_test == 1) & (y_pred == 0)][:, 1], s=100,
64 |             facecolors='none', edgecolors='r', label='False Negative')
65 | 
66 | # Highlight false positives
67 | plt.scatter(X_test[(y_test == 0) & (y_pred == 1)][:, 0], X_test[(y_test == 0) & (y_pred == 1)][:, 1], s=100,
68 |             facecolors='none', edgecolors='m', label='False Positive')
69 | 
70 | plt.xlabel('Feature 1')
71 | plt.ylabel('Feature 2')
72 | plt.title('Fraud Detection')
73 | plt.legend()
74 | plt.show()
75 | 
76 | '''
77 | Explanation:
78 | 
79 | Data Generation: We're creating synthetic data for both genuine and fraudulent transactions. In practice, you would replace this with your actual data.
80 | 
81 | Feature Scaling: Using the StandardScaler from scikit-learn to normalize features, which is often necessary for machine learning algorithms.
82 | 
83 | Train-Test Split: We're splitting the data into training and test sets, with 20% of the data reserved for testing.
84 | 
85 | Random Forest Classifier: A simple Random Forest model is trained on the training data.
86 | 
87 | Prediction and Evaluation: We then use the trained model to make predictions on the test set, and print evaluation metrics like accuracy, confusion matrix, and classification report.
88 | 
89 | Plotting: Finally, we plot the test data, highlighting genuine and fraudulent transactions. We also indicate false positives and false negatives.
90 | 
91 | This is a simplified example meant for demonstration. Real-world fraud detection models would involve far more complexity, such as dealing with imbalanced data, feature engineering, hyperparameter tuning, and possibly using more advanced algorithms.
92 | '''


--------------------------------------------------------------------------------
/5. ML Applications In Finance/01. Risk Management ✅/4. operational_risk_modeling✅.py:
--------------------------------------------------------------------------------
 1 | # Predictive models to identify potential operational hazards and risks.
 2 | 
 3 | '''
 4 | a Python script that demonstrates how to use machine learning to predict operational hazards based on synthetic features.
 5 | 
 6 | In this example, I'll use a RandomForestClassifier from scikit-learn to create a predictive model.
 7 | The target variable is a binary outcome representing whether or not an operational hazard exists (1 for hazard, 0 for no hazard).
 8 | '''
 9 | 
10 | # Import Libraries
11 | import numpy as np
12 | import pandas as pd
13 | import matplotlib.pyplot as plt
14 | from sklearn.model_selection import train_test_split
15 | from sklearn.ensemble import RandomForestClassifier
16 | from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
17 | from sklearn.preprocessing import StandardScaler
18 | 
19 | # Generate synthetic data for demonstration
20 | # In a real-world application, you would use actual data
21 | np.random.seed(42)
22 | n_samples = 1000
23 | 
24 | # Non-hazardous situations with features centered around (2, 2)
25 | non_hazard = np.random.normal(2, 1, (int(n_samples * 0.7), 2))
26 | non_hazard_labels = np.zeros(int(n_samples * 0.7))
27 | 
28 | # Hazardous situations with features centered around (5, 5)
29 | hazard = np.random.normal(5, 1, (int(n_samples * 0.3), 2))
30 | hazard_labels = np.ones(int(n_samples * 0.3))
31 | 
32 | # Combine into one dataset
33 | X = np.vstack([non_hazard, hazard])
34 | y = np.hstack([non_hazard_labels, hazard_labels])
35 | 
36 | # Feature Scaling
37 | scaler = StandardScaler()
38 | X = scaler.fit_transform(X)
39 | 
40 | # Split into training and test sets
41 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
42 | 
43 | # Initialize and train the RandomForestClassifier
44 | clf = RandomForestClassifier(random_state=42)
45 | clf.fit(X_train, y_train)
46 | 
47 | # Make predictions
48 | y_pred = clf.predict(X_test)
49 | 
50 | # Evaluation Metrics
51 | print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
52 | print(f'Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}')
53 | print(f'Classification Report: \n{classification_report(y_test, y_pred)}')
54 | 
55 | # Plotting the results
56 | plt.figure(figsize=(10, 6))
57 | 
58 | # Plot non-hazardous situations
59 | plt.scatter(X_test[y_test == 0][:, 0], X_test[y_test == 0][:, 1], label='Non-Hazard', alpha=0.6)
60 | 
61 | # Plot hazardous situations
62 | plt.scatter(X_test[y_test == 1][:, 0], X_test[y_test == 1][:, 1], label='Hazard', alpha=0.6)
63 | 
64 | # Highlight false negatives
65 | plt.scatter(X_test[(y_test == 1) & (y_pred == 0)][:, 0], X_test[(y_test == 1) & (y_pred == 0)][:, 1], s=100,
66 |             facecolors='none', edgecolors='r', label='False Negative')
67 | 
68 | # Highlight false positives
69 | plt.scatter(X_test[(y_test == 0) & (y_pred == 1)][:, 0], X_test[(y_test == 0) & (y_pred == 1)][:, 1], s=100,
70 |             facecolors='none', edgecolors='m', label='False Positive')
71 | 
72 | plt.xlabel('Feature 1')
73 | plt.ylabel('Feature 2')
74 | plt.title('Operational Hazard Prediction')
75 | plt.legend()
76 | plt.show()
77 | 
78 | '''
79 | Explanation:
80 | 
81 | Data Generation: Synthetic data is generated to simulate operational hazards. In practice, you would use actual operational data with relevant features.
82 | 
83 | Feature Scaling: Features are scaled using StandardScaler from scikit-learn. This is often necessary for machine learning algorithms.
84 | 
85 | Train-Test Split: The dataset is split into a training set and a test set.
86 | 
87 | Random Forest Classifier: A RandomForestClassifier is trained on the training set.
88 | 
89 | Prediction and Evaluation: The model is used to make predictions on the test set, and several evaluation metrics are printed.
90 | 
91 | Plotting: A scatter plot of the test set is created where true positives, true negatives, false positives, and false negatives are marked. This provides a visual insight into how well the model is performing.
92 | 
93 | This is a simplified example intended for demonstration purposes. A real-world application would include more steps like feature engineering, dealing with imbalanced data, hyperparameter tuning, and perhaps the use of more advanced machine learning algorithms.
94 | '''


--------------------------------------------------------------------------------
/5. ML Applications In Finance/02. Asset Management ✅/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/02. Asset Management ✅/.DS_Store


--------------------------------------------------------------------------------
/5. ML Applications In Finance/02. Asset Management ✅/1. portfolio_optimization✅.py:
--------------------------------------------------------------------------------
  1 | # Models to maximize returns for a given level of risk.
  2 | 
  3 | '''
  4 | Portfolio optimization using machine learning involves selecting the mix of investment assets that is statistically likely
  5 | to achieve a desired return for a given level of risk. One of the popular methods for this is the use of the
  6 | Mean-Variance Optimization model. Here, I'll show you a simplified example using Random Forest Regression to predict future returns,
  7 | and then optimizing the portfolio based on those returns.
  8 | '''
  9 | 
 10 | import streamlit as st
 11 | import yfinance as yf
 12 | import numpy as np
 13 | import pandas as pd
 14 | import matplotlib.pyplot as plt
 15 | from scipy.optimize import minimize
 16 | from pandas_datareader import data as pdr
 17 | import datetime
 18 | import concurrent.futures
 19 | 
 20 | # Fetch stock names using multi-threading
 21 | def get_stock_name(ticker):
 22 |     stock = yf.Ticker(ticker)
 23 |     info = stock.info
 24 |     return info.get('shortName') or info.get('longName') or ticker
 25 | 
 26 | def fetch_all_stock_names(tickers):
 27 |     ticker_to_name = {}
 28 |     with concurrent.futures.ThreadPoolExecutor() as executor:
 29 |         future_to_ticker = {executor.submit(get_stock_name, ticker): ticker for ticker in tickers}
 30 |         for future in concurrent.futures.as_completed(future_to_ticker):
 31 |             ticker = future_to_ticker[future]
 32 |             try:
 33 |                 ticker_to_name[ticker] = future.result()
 34 |             except Exception as e:
 35 |                 print(f"Could not fetch name for {ticker}: {e}")
 36 |                 ticker_to_name[ticker] = ticker
 37 |     return ticker_to_name
 38 | 
 39 | # Streamlit title and setup
 40 | st.title("Portfolio Optimization App")
 41 | 
 42 | # Timeframe
 43 | timeframe = st.selectbox('Select Timeframe:', ['1Y', '2Y', '3Y'])
 44 | start_date = str((datetime.datetime.now() - pd.DateOffset(years=int(timeframe[0]))).date())
 45 | end_date = str(datetime.datetime.now().date())
 46 | 
 47 | # Categorized Tickers
 48 | all_tickers = {
 49 |     'Stocks': ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA"],
 50 |     'Stock ETFs': ["SPY", "QQQ", "EFA", "IWM", "EEM"],
 51 |     'Commodities': ["GLD", "SLV", "USO"],
 52 |     'Bond ETFs': ["TLT"],
 53 |     'Real Estate ETFs': ["VNQ"],
 54 |     'Highlighted ETFs': ["0P0000XMRD.L", "0P0000KSPA.L", "0P000023MW.L", "0P000185T1.L", "0P0000TKZG.L"],
 55 |     'Highlighted Stocks': ["NTDOY", "PLTK", "INSE", "SCPL", "EA"]
 56 | }
 57 | 
 58 | all_tickers_flat = [item for sublist in all_tickers.values() for item in sublist]
 59 | ticker_to_name = fetch_all_stock_names(all_tickers_flat)
 60 | 
 61 | # Selection
 62 | selected_tickers = []
 63 | for category, tickers in all_tickers.items():
 64 |     st.write(f"## {category}")
 65 |     selected = st.multiselect('', tickers, format_func=lambda x: f"{ticker_to_name[x]} ({x})")
 66 |     selected_tickers.extend(selected)
 67 | 
 68 | # Make sure SPY is in the selected_tickers for benchmarking
 69 | if 'SPY' not in selected_tickers:
 70 |     selected_tickers.append('SPY')
 71 | 
 72 | # Download stock data
 73 | def download_data(ticker_list, start_date, end_date):
 74 |     data = yf.download(ticker_list, start=start_date, end=end_date)['Adj Close']
 75 |     return data.pct_change().dropna()
 76 | 
 77 | # Portfolio optimization
 78 | def optimize_portfolio(returns):
 79 |     def objective(weights):
 80 |         portfolio_return = np.sum(returns.mean() * weights) * 252
 81 |         portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights)))
 82 |         return -portfolio_return / portfolio_volatility
 83 | 
 84 |     initial_weights = [1. / len(returns.columns)] * len(returns.columns)
 85 |     bounds = tuple((0, 1) for asset in range(len(returns.columns)))
 86 |     constraints = [{'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1}]
 87 | 
 88 |     solution = minimize(objective, initial_weights, bounds=bounds, constraints=constraints)
 89 |     return solution.x
 90 | 
 91 | # Download data
 92 | data = download_data(selected_tickers, start_date, end_date)
 93 | 
 94 | # Optimize portfolio
 95 | optimal_weights = optimize_portfolio(data)
 96 | 
 97 | # Display Results
 98 | st.write("## Final Portfolio Allocation:")
 99 | allocation_df = pd.DataFrame({
100 |     'Asset': [ticker_to_name[ticker] for ticker in selected_tickers],
101 |     'Ticker': selected_tickers,
102 |     'Weights': optimal_weights
103 | })
104 | st.table(allocation_df)
105 | 
106 | # Pie Chart (Only include assets with at least 1% weight)
107 | significant_weights = optimal_weights[optimal_weights >= 0.01]
108 | significant_tickers = np.array(selected_tickers)[optimal_weights >= 0.01]
109 | 
110 | st.write("## Portfolio Allocation Chart:")
111 | fig, ax = plt.subplots()
112 | ax.pie(significant_weights, labels=[ticker_to_name[ticker] for ticker in significant_tickers], autopct='%1.1f%%')
113 | ax.axis('equal')
114 | st.pyplot(fig)
115 | 
116 | # Performance vs. SPY
117 | cumulative_portfolio_return = (data * optimal_weights).sum(axis=1).add(1).cumprod().sub(1)
118 | cumulative_spy_return = data['SPY'].add(1).cumprod().sub(1)
119 | 
120 | fig, ax = plt.subplots()
121 | cumulative_portfolio_return.plot(ax=ax, label='Portfolio')
122 | cumulative_spy_return.plot(ax=ax, label='SPY')
123 | plt.legend()
124 | plt.title("Portfolio Performance vs. SPY")
125 | st.pyplot(fig)
126 | 
127 | # Calculate Sharpe Ratio
128 | risk_free_data = pdr.get_data_fred('GS3M', start_date, end_date)
129 | risk_free_data_monthly = risk_free_data.resample('M').mean()
130 | risk_free_data_monthly.interpolate(method='linear', inplace=True)
131 | risk_free_data_aligned = risk_free_data_monthly.reindex(data.index, method='pad') / 100 / 252
132 | 
133 | portfolio_return = (data * optimal_weights).sum(axis=1)
134 | excess_portfolio_return = portfolio_return.sub(risk_free_data_aligned['GS3M'].squeeze(), axis=0)
135 | 
136 | sharpe_ratio = np.sqrt(252) * (excess_portfolio_return.mean() / excess_portfolio_return.std())
137 | st.write(f"## Annualized Sharpe Ratio: {sharpe_ratio:.4f}")
138 | 
139 | 
140 | # Note: This is a simplified example. Always consult with a financial advisor before making any investment decisions.
141 | 
142 | # To run, do:
143 | # "streamlit run 1.\ portfolio_optimization.py"
144 | 
145 | 
146 | 
147 | '''
148 | Explanation:
149 | 
150 | Synthetic Data: The example uses synthetic stock return data. In a real-world scenario, you'd use historical stock return data.
151 | UPDATE: Replaced with yfinance data.
152 | 
153 | User Input: The desired annual return and risk tolerance can be input by the user.
154 | 
155 | Data Splitting: The data is split into training and test sets. We use the training set to train our machine learning model.
156 | 
157 | Random Forest Regressor: We use Random Forest to predict the future returns of each asset in the test dataset.
158 | 
159 | Predictions and Plot: We make predictions using the trained models and plot the real vs. predicted returns of the first asset.
160 | 
161 | Optimization: Using predicted returns, we optimize the portfolio weights to maximize the Sharpe Ratio subject to the user-defined constraints on return and risk. This is done using the minimize function from scipy's optimize module.
162 | 
163 | Optimal Weights: Finally, the script prints the optimal weights for asset allocation to achieve the user's financial goals.
164 | '''


--------------------------------------------------------------------------------
/5. ML Applications In Finance/02. Asset Management ✅/2. algorithmic_trading✅.py:
--------------------------------------------------------------------------------
 1 | # Utilizing algorithms and quantitative models to execute trades at optimal prices.
 2 | # Algorithmic Trading Script Using Random Forest
 3 | 
 4 | import pandas as pd
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | import yfinance as yf
 8 | from sklearn.model_selection import train_test_split
 9 | from sklearn.ensemble import RandomForestClassifier
10 | from sklearn.metrics import accuracy_score
11 | 
12 | # Fetching data using yfinance
13 | ticker = 'AAPL'
14 | start_date = '2010-01-01'
15 | end_date = '2023-01-01'
16 | 
17 | data = yf.download(ticker, start=start_date, end=end_date)
18 | 
19 | # Feature Engineering
20 | 
21 | data['Close_Lag1'] = data['Close'].shift(1)
22 | data['Return'] = (data['Close'] - data['Close_Lag1']) / data['Close_Lag1']
23 | data['MA5'] = data['Close'].rolling(window=5).mean()
24 | data['MA10'] = data['Close'].rolling(window=10).mean()
25 | data['MA_Diff'] = data['MA5'] - data['MA10']
26 | data['Momentum'] = data['Close'] - data['Close'].shift(4)
27 | data['Volatility'] = data['Return'].rolling(window=5).std()
28 | data.dropna(inplace=True)
29 | data['Target'] = (data['Return'] > 0).astype(int)
30 | 
31 | features = ['Close', 'Close_Lag1', 'MA5', 'MA10', 'MA_Diff', 'Momentum', 'Volatility']
32 | X = data[features]
33 | y = data['Target']
34 | 
35 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
36 | 
37 | rf_clf = RandomForestClassifier(n_estimators=100)
38 | rf_clf.fit(X_train, y_train)
39 | 
40 | y_pred = rf_clf.predict(X_test)
41 | accuracy = accuracy_score(y_test, y_pred)
42 | print(f"Accuracy [Random Forest]: {accuracy:.2f}")
43 | 
44 | data['RF_Predicted_Signal'] = np.nan
45 | data.iloc[(len(data) - len(y_pred)):, data.columns.get_loc('RF_Predicted_Signal')] = y_pred
46 | data['RF_Strategy_Return'] = data['Return'] * (data['RF_Predicted_Signal'] * 2 - 1)
47 | data['RF_Cumulative_Strategy_Returns'] = (1 + data['RF_Strategy_Return']).cumprod()
48 | 
49 | data['Cumulative_Market_Returns'] = (1 + data['Return']).cumprod()
50 | 
51 | # Plot
52 | fig, ax = plt.subplots(figsize=(15, 10))
53 | 
54 | # Plot stock prices
55 | ax.plot(data.index, data['Close'], color='g', label='Stock Price', alpha=0.5)
56 | 
57 | # Plot strategy and market returns
58 | test_data_start = data.iloc[len(data) - len(y_pred):].index[0]
59 | ax.plot(data.loc[test_data_start:]['RF_Cumulative_Strategy_Returns'], color='b', label='Random Forest Strategy Returns')
60 | ax.plot(data.loc[test_data_start:]['Cumulative_Market_Returns'], color='r', label='Buy and Hold Returns')
61 | ax.legend(loc="upper left")
62 | ax.set_ylabel('Value')
63 | 
64 | plt.tight_layout()
65 | plt.show()
66 | 


--------------------------------------------------------------------------------
/5. ML Applications In Finance/02. Asset Management ✅/3. index_tracking✅.py:
--------------------------------------------------------------------------------
 1 | # Algorithms to create a portfolio that closely follows a particular index.
 2 | 
 3 | '''
 4 | Creating a portfolio that tracks an index is the basis for index funds and ETFs. One way to do this is by finding the optimal weights of the stocks in the index such that the tracking error is minimized.
 5 | 
 6 | In this script, I'll outline the following steps:
 7 | 
 8 | 1. Fetch the historical data of stocks in the index using yfinance.
 9 | 2. Calculate the returns of each stock.
10 | 3. Define an optimization problem to find the optimal weights that minimize the tracking error.
11 | 4. Plot the actual index performance vs. our portfolio's performance.
12 | '''
13 | 
14 | import yfinance as yf
15 | import numpy as np
16 | import pandas as pd
17 | import matplotlib.pyplot as plt
18 | from scipy.optimize import minimize
19 | 
20 | # 1. Fetch Data
21 | # Let's say we want to track the S&P 500. We'll take a subset of companies for simplicity.
22 | tickers = ['AAPL', 'MSFT', 'GOOGL', '^GSPC']  # ^GSPC is the ticker for S&P 500
23 | data = yf.download(tickers, start="2020-01-01", end="2023-01-01")['Adj Close']
24 | 
25 | # 2. Calculate Returns
26 | returns = data.pct_change().dropna()
27 | 
28 | # 3. Optimization
29 | def tracking_error(weights: np.array) -> float:
30 |     # Calculate the portfolio returns given the weights
31 |     port_returns = returns.iloc[:, :-1].dot(weights)
32 |     # Calculate the tracking error
33 |     error = np.sum((port_returns - returns['^GSPC'])**2)
34 |     return error
35 | 
36 | # Constraints and bounds
37 | cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
38 | bounds = [(0, 1) for _ in range(returns.shape[1] - 1)]  # the number of stocks minus one for the S&P 500 index column
39 | 
40 | # Minimize the negative Sharpe Ratio to get maximum Sharpe ratio
41 | initial_guess = [1./len(tickers) for _ in tickers[:-1]]  # equal weights as an initial guess
42 | result = minimize(tracking_error, initial_guess, bounds=bounds, constraints=cons)
43 | 
44 | # Extract the optimal weights
45 | optimal_weights = result.x
46 | 
47 | # 4. Plot
48 | # Calculate portfolio with optimal weights
49 | data['Portfolio'] = data.iloc[:, :-1].dot(optimal_weights)
50 | normalized_data = data / data.iloc[0]  # Normalize data for better visualization
51 | 
52 | plt.figure(figsize=(14, 7))
53 | normalized_data['^GSPC'].plot(label='S&P 500')
54 | normalized_data['Portfolio'].plot(label='Tracked Portfolio')
55 | plt.title('Index Tracking')
56 | plt.xlabel('Date')
57 | plt.ylabel('Normalized Value')
58 | plt.legend()
59 | plt.grid(True)
60 | plt.show()
61 | 
62 | # Display optimal weights
63 | print("Optimal Weights:", optimal_weights)
64 | 


--------------------------------------------------------------------------------
/5. ML Applications In Finance/02. Asset Management ✅/4. pairs_trading✅.py:
--------------------------------------------------------------------------------
 1 | # Identifying pairs of assets whose prices have a statistical relationship, used for arbitrage
 2 | 
 3 | '''
 4 | Pairs trading is a strategy that identifies pairs of assets (typically stocks) that are historically price correlated.
 5 | When their prices deviate substantially, one stock is shorted while the other is bought, with the expectation that the two prices will converge again.
 6 | 
 7 | Here's a basic outline for a Pairs Trading strategy:
 8 | 
 9 | 1. Data Collection: Fetch historical data for a set of potential pairs.
10 | 2. Pair Selection: Identify pairs with a strong statistical relationship.
11 | 3. Signal Generation: Determine entry (long/short) and exit points based on a Z-score of the spread.
12 | 4. Trade Execution & Management: Execute the trades and manage the positions.
13 | '''
14 | 
15 | import yfinance as yf
16 | import numpy as np
17 | import pandas as pd
18 | import matplotlib.pyplot as plt
19 | from statsmodels.tsa.stattools import coint
20 | 
21 | # 1. Data Collection
22 | tickers = ['DAL', 'AAL', 'UAL', 'LUV']  # airline stocks as an example
23 | data = yf.download(tickers, start="2020-01-01", end="2023-01-01")['Adj Close']
24 | data = data.fillna(method='ffill')  # forward-fill missing values
25 | 
26 | # 2. Pair Selection
27 | def find_cointegrated_pairs(data, pvalue_threshold=0.1):  # Adjusted threshold
28 |     n = data.shape[1]
29 |     score_matrix = np.zeros((n, n))
30 |     pvalue_matrix = np.ones((n, n))
31 |     keys = data.keys()
32 |     pairs = []
33 |     for i in range(n):
34 |         for j in range(i+1, n):
35 |             S1 = data[keys[i]]
36 |             S2 = data[keys[j]]
37 |             result = coint(S1, S2)
38 |             score = result[0]
39 |             pvalue = result[1]
40 |             score_matrix[i, j] = score
41 |             pvalue_matrix[i, j] = pvalue
42 |             if pvalue < pvalue_threshold:  # P-value threshold
43 |                 pairs.append((keys[i], keys[j]))
44 |     return score_matrix, pvalue_matrix, pairs
45 | 
46 | _, _, pairs = find_cointegrated_pairs(data)
47 | print("Cointegrated pairs:", pairs)
48 | 
49 | # Check if we have any cointegrated pairs before proceeding
50 | if not pairs:
51 |     print("No cointegrated pairs found!")
52 |     exit()
53 | 
54 | # For demonstration, let's use the first cointegrated pair.
55 | S1 = data[pairs[0][0]]
56 | S2 = data[pairs[0][1]]
57 | 
58 | # Calculate the spread
59 | spread = S1 - S2
60 | spread_mean = spread.mean()
61 | spread_std = spread.std()
62 | 
63 | # 3. Signal Generation
64 | zscore = (spread - spread_mean) / spread_std
65 | entry_threshold = 1.5
66 | exit_threshold = 0.5
67 | 
68 | # Buy when zscore < -entry_threshold, sell when zscore > -exit_threshold
69 | # Short when zscore > entry_threshold, cover when zscore < exit_threshold
70 | longs = (zscore < -entry_threshold) & (zscore.shift(1) > -entry_threshold)
71 | shorts = (zscore > entry_threshold) & (zscore.shift(1) < entry_threshold)
72 | exits = (np.abs(zscore) < exit_threshold)
73 | 
74 | # 4. Plotting
75 | plt.figure(figsize=(15,7))
76 | 
77 | S1[longs].plot(marker='^', markersize=10, color='g', linestyle='None', alpha=0.7)
78 | S1[shorts].plot(marker='v', markersize=10, color='r', linestyle='None', alpha=0.7)
79 | S1[exits].plot(marker='o', markersize=6, color='b', linestyle='None', alpha=0.7)
80 | S1.plot(color='b')
81 | S2.plot(color='c')
82 | plt.xlabel('Date')
83 | plt.ylabel('Price')
84 | plt.title('Pairs Trading')
85 | plt.legend([pairs[0][0], pairs[0][1], 'Buy Signal', 'Sell Signal', 'Exit Signal'])
86 | plt.show()
87 | 
88 | 


--------------------------------------------------------------------------------
/5. ML Applications In Finance/02. Asset Management ✅/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | yfinance
3 | numpy
4 | pandas
5 | matplotlib
6 | scipy
7 | pandas_datareader


--------------------------------------------------------------------------------
/5. ML Applications In Finance/03. Market Analysis And Prediction/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/03. Market Analysis And Prediction/.DS_Store


--------------------------------------------------------------------------------
/5. ML Applications In Finance/03. Market Analysis And Prediction/1. price_forecasting.py:
--------------------------------------------------------------------------------
1 | # Using time-series models to predict future stock prices or market trends.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/03. Market Analysis And Prediction/2. sentiment_analysis.py:
--------------------------------------------------------------------------------
1 | # Analyzing news articles or social media to gauge market sentiment.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/03. Market Analysis And Prediction/3. option_pricing.py:
--------------------------------------------------------------------------------
1 | # Using computational methods to fair-value options and other derivatives.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/03. Market Analysis And Prediction/4. order_flow_prediction.py:
--------------------------------------------------------------------------------
1 | # Predicting the future order flow (buy/sell) based on existing order books.
2 | 


--------------------------------------------------------------------------------
/5. ML Applications In Finance/04. Customer Service/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/04. Customer Service/.DS_Store


--------------------------------------------------------------------------------
/5. ML Applications In Finance/04. Customer Service/1. chatbots.py:
--------------------------------------------------------------------------------
1 | # Automated conversational agents for customer service.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/04. Customer Service/2. personal_finance_management.py:
--------------------------------------------------------------------------------
1 | # Recommender systems for personalized financial planning and product recommendations.
2 | 
3 | 


--------------------------------------------------------------------------------
/5. ML Applications In Finance/04. Customer Service/3. customer_segmentation.py:
--------------------------------------------------------------------------------
1 | # Identifying different customer segments to offer tailored products.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/05. Compliance and Regulatory/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/05. Compliance and Regulatory/.DS_Store


--------------------------------------------------------------------------------
/5. ML Applications In Finance/05. Compliance and Regulatory/1. anti_money_laundering_AML.py:
--------------------------------------------------------------------------------
1 | # Detecting potentially illegal actions through transaction monitoring.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/05. Compliance and Regulatory/2. regulatory_reporting_automation.py:
--------------------------------------------------------------------------------
1 | # Automated systems for generating regulatory reports.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/05. Compliance and Regulatory/3. insider_trading_detection.py:
--------------------------------------------------------------------------------
1 | # Identifying suspicious trading patterns using anomaly detection.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/06. Real Estate/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/06. Real Estate/.DS_Store


--------------------------------------------------------------------------------
/5. ML Applications In Finance/06. Real Estate/1. property_valuation.py:
--------------------------------------------------------------------------------
1 | # Automated valuation models for real estate pricing.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/06. Real Estate/2. investment_analysis.py:
--------------------------------------------------------------------------------
1 | #  Evaluating the potential return on investment for different real estate properties.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/07. Supply Chain Finance/supply_chain_finance.py:
--------------------------------------------------------------------------------
1 | # Optimization of supply chain processes and financing.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/08. Invoice Management/invoice_management.py:
--------------------------------------------------------------------------------
1 | # Automated processing and management of invoices.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/09. Cash Management/cash_management.py:
--------------------------------------------------------------------------------
1 | # Algorithms for optimal cash reserves and investment.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/10. Decentralized Finance (DEFI)/1.yield_farming_optimizer.py:
--------------------------------------------------------------------------------
1 | # Write a script that interfaces with various DeFi protocols to find the best yield farming opportunities. 
2 | # Use APIs to get real-time data on interest rates and automatically suggest or even reallocate assets.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/10. Decentralized Finance (DEFI)/2.smart_contract_auditor.py:
--------------------------------------------------------------------------------
1 | # Build a tool to evaluate the security and efficiency of smart contracts on blockchain platforms like Ethereum.
2 | #  Utilize formal verification or static analysis libraries to identify vulnerabilities.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/11. Environmental Social And Governance Investing (ESG)/1.sustainability_analytics.py:
--------------------------------------------------------------------------------
1 | # Use Natural Language Processing (NLP) to analyze a company's sustainability reports and practices.
2 | # Quantify metrics and present them in a user-friendly dashboard.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/11. Environmental Social And Governance Investing (ESG)/2.impact_measurement.py:
--------------------------------------------------------------------------------
1 | # Develop a model to calculate the environmental and social impact of a company based on publicly available data,
2 | # such as emissions reports and community involvement.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/12. Behavioural Economics/1.nudges.py:
--------------------------------------------------------------------------------
1 | # Design a Python script that integrates with personal finance apps to provide real-time "nudges" 
2 | # to encourage saving or responsible spending based on user behavior.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/12. Behavioural Economics/2.investor_sentiment_models.py:
--------------------------------------------------------------------------------
1 | # Use machine learning to analyze sentiment data from social media platforms and news articles to model investor sentiment. 
2 | # See how sentiment correlates with stock price movements.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/13. Blockchain And Cryptocurrency/cryptocurrency_price_prediction.py:
--------------------------------------------------------------------------------
1 | # Utilize machine learning models like LSTM to predict cryptocurrency prices based on historical data and other indicators.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/14. Explainable AI For Finance/model_interpretability.py:
--------------------------------------------------------------------------------
1 | # Create a machine learning model for predicting stock prices or credit scores, and incorporate SHAP (Shapley Additive exPlanations)
2 | # or LIME (Local Interpretable Model-agnostic Explanations) for interpretability.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/15. Robotic Process Automation (RPA)/automated_invoice_processing.py:
--------------------------------------------------------------------------------
1 | # Use Optical Character Recognition (OCR) to read invoices and input data into a database or accounting software.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/16. Textual And Alternative Data For Finance/news_analytics.py:
--------------------------------------------------------------------------------
1 | # Create a real-time dashboard that uses NLP to analyze financial news for keywords and sentiments that could be trading signals.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/16. Textual And Alternative Data For Finance/reddit_sentiment_and_market_trends.py:
--------------------------------------------------------------------------------
1 | # Scrape Reddit data to perform sentiment analysis and correlate this with stock or cryptocurrency trends.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/17. Fundamental Anaysis/automated_10kand10Q_parser.py:
--------------------------------------------------------------------------------
1 | # Write a script that uses NLP to automatically extract and summarize key financial metrics and textual insights from companies'
2 | # 10-K and 10-Q reports.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/17. Fundamental Anaysis/financial_ratios_dashboard.py:
--------------------------------------------------------------------------------
1 | # Develop a dashboard that displays crucial financial ratios, automatically calculated from a company's balance sheet,
2 | # income statement, and cash flow statement.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/18. Satellite Image Analysis For Finance/agricultural_yield_prediction.py:
--------------------------------------------------------------------------------
1 | # Analyze satellite images for signs of crop health and size. These metrics can provide insights into future commodity prices.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/18. Satellite Image Analysis For Finance/disaster_impact_assessment.py:
--------------------------------------------------------------------------------
1 | # Use satellite images pre and post-natural disasters to assess the impact on infrastructure, agriculture, and local economies,
2 | # which can significantly affect market conditions.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/18. Satellite Image Analysis For Finance/natural_resource_exploration.py:
--------------------------------------------------------------------------------
1 | # Analyze satellite images to identify new or depleted natural resources like forests, water bodies, or mineral deposits,
2 | # which can be crucial information for investing in relevant sectors.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/18. Satellite Image Analysis For Finance/real_estate_development_monitoring.py:
--------------------------------------------------------------------------------
1 | # Use time-lapsed satellite images to monitor construction and development activity in specific geographical regions.
2 | # This can provide insights into real estate markets and housing prices.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/18. Satellite Image Analysis For Finance/retail_traffic_analysis.py:
--------------------------------------------------------------------------------
1 | # Use satellite imagery to count cars in the parking lots of retail stores. This can serve as an alternative data source
2 | # for estimating store popularity, sales, or even economic trends in a given area.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/18. Satellite Image Analysis For Finance/shipping_activity.py:
--------------------------------------------------------------------------------
1 | # Monitor shipping lanes and ports to gauge activity levels, which could be indicative of economic health 
2 | # or trade flow between countries.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/18. Satellite Image Analysis For Finance/tech_stack.txt:
--------------------------------------------------------------------------------
1 | 1. Satellite Image Providers: Platforms like Sentinel Hub, NASA EarthData, or even Google Earth Engine can provide the raw satellite images
2 |  you'd need.
3 | 2. Image Processing Libraries: OpenCV for basic image processing tasks, or specialized libraries like Rasterio for geospatial data.
4 | 3. Machine Learning Libraries: TensorFlow or PyTorch for any predictive models you might want to develop.
5 | 4. Data Visualization: Libraries like Matplotlib for static charts or Dash/Plotly for interactive dashboards.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/19. Venture Capital/cap_table_simulation.py:
--------------------------------------------------------------------------------
1 | # Create a tool that allows for the simulation of various funding rounds and exits, showing how ownership and dilution
2 | # evolve over time.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/19. Venture Capital/investment_thesis_generator.py:
--------------------------------------------------------------------------------
1 | # Use NLP to analyze a large corpus of successful investment theses and presentations to generate a template or 
2 | # even a first draft for a new investment thesis.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/19. Venture Capital/portfolio_monitoring.py:
--------------------------------------------------------------------------------
1 | # Build an application that tracks key performance indicators (KPIs) of a venture capital portfolio,
2 | # such as customer growth rate, churn, and lifetime value. You can use Python libraries like Dash or Streamlit
3 | # for interactive dashboards.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/19. Venture Capital/startup_scouting_dashboard.py:
--------------------------------------------------------------------------------
1 | # Create a dashboard that aggregates information from various sources like Crunchbase, Twitter, and academic journals
2 | # to identify promising startups for investment. You could use web scraping and NLP techniques to get this data.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/20. Private Equity/buyout_model_automation.py:
--------------------------------------------------------------------------------
1 | # Implement a script that automates the generation of a Leveraged Buyout (LBO) model based on user input or scraped data.
2 | # This can provide quick valuation estimates for potential acquisitions.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/20. Private Equity/deal_sourcing.py:
--------------------------------------------------------------------------------
1 | # Use machine learning algorithms to predict which companies are most likely to be open to a buyout or investment,
2 | # based on features like financial metrics, management changes, or market conditions.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/20. Private Equity/due_dilligence_automation.py:
--------------------------------------------------------------------------------
1 | # Streamline the due diligence process by scraping data and reports related to a target company or industry.
2 | # This could include financial data, regulatory filings, news mentions, and more.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/20. Private Equity/esg_integration.py:
--------------------------------------------------------------------------------
1 | # Create a tool that assesses potential and current investments for their Environmental, Social, and Governance (ESG) impact,
2 | # potentially using Natural Language Processing to scan through company reports and news articles for relevant information.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/20. Private Equity/post_acquisition_value_creation.py:
--------------------------------------------------------------------------------
1 | # Build models that can forecast the impact of various operational improvements on a company's financials,
2 | # helping to inform post-acquisition strategies.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/21. VC & PE General Tools/investor_matching.py:
--------------------------------------------------------------------------------
1 | # An algorithm that matches startups with potential investors based on investor preference, startup sector, stage, and other factors.
2 | 
3 | 


--------------------------------------------------------------------------------
/5. ML Applications In Finance/21. VC & PE General Tools/sector_trend_analysis.py:
--------------------------------------------------------------------------------
1 | # Create an algorithm that uses machine learning to identify emerging sectors or trends based on news articles, 
2 | # patent filings, academic papers, or market data.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/21. VC & PE General Tools/sentiment_analysis_for_foundersandexecutives.py:
--------------------------------------------------------------------------------
1 | # Use Natural Language Processing to analyze interviews, podcasts, or social media interactions involving company founders
2 | # or executives to gauge leadership quality and public perception.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/21. VC & PE General Tools/valuation_multiples_benchmarking.py:
--------------------------------------------------------------------------------
1 | # A tool that collects and analyzes valuation multiples for companies in similar industries or stages
2 | # to provide a comparative benchmark.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/22. Investment Banking/.DS_Store


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/1. mergers_and_acquisitions_M&A/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manujajay/machinelearning4finance/7a315e218c211633e28f6ed3de3bdde1de5e09d3/5. ML Applications In Finance/22. Investment Banking/1. mergers_and_acquisitions_M&A/.DS_Store


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/1. mergers_and_acquisitions_M&A/deal_comparator.py:
--------------------------------------------------------------------------------
1 | # Build a tool to compare past M&A deals based on various metrics like deal size, industry, and financial ratios,
2 | # to gauge the attractiveness of a potential new deal.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/1. mergers_and_acquisitions_M&A/manda_target_screening.py:
--------------------------------------------------------------------------------
1 | # Develop a script that scans financial databases to identify companies that meet specific M&A criteria,
2 | # such as EBITDA margins, revenue growth, or market cap.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/1. mergers_and_acquisitions_M&A/synergy_estimator.py:
--------------------------------------------------------------------------------
1 | # Create a tool that uses historical data to estimate the potential synergies between two merging companies,
2 | # including cost-saving and revenue synergies.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/2. valuation_and_financial_modeling/automated_dcf_model.py:
--------------------------------------------------------------------------------
1 | # Create a tool that can automatically populate a discounted cash flow (DCF) model based on financial statement data,
2 | # offering a quick way to get a valuation estimate.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/2. valuation_and_financial_modeling/capital_structure_optimizer.py:
--------------------------------------------------------------------------------
1 | # Build an algorithm to find the optimal capital structure for a company to minimize its cost of capital.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/2. valuation_and_financial_modeling/comparable_company_analysis.py:
--------------------------------------------------------------------------------
1 | # Implement a script that scrapes market data to perform a comparable company analysis, which is often used for valuation
2 | # in investment banking.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/2. valuation_and_financial_modeling/wacc_calculator.py:
--------------------------------------------------------------------------------
1 | #  A tool to automatically calculate the Weighted Average Cost of Capital (WACC), a key metric in many valuation models.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/3. risk_management/credit_risk_assessment.py:
--------------------------------------------------------------------------------
1 | #  Develop a machine learning model that evaluates the credit risk associated with corporate loans or bonds.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/3. risk_management/foreign_exchange_risk_management.py:
--------------------------------------------------------------------------------
1 | # Implement a tool to simulate various hedging strategies for managing foreign exchange risk in cross-border deals.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/4. ipo_process/ipo_valuation_model.py:
--------------------------------------------------------------------------------
1 | # Create a model to estimate the potential valuation of a company considering an initial public offering (IPO).


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/4. ipo_process/roadshow_presentation_generator.py:
--------------------------------------------------------------------------------
1 | # Use Natural Language Processing (NLP) to assist in generating the textual content for IPO roadshow presentations
2 | # based on historical data and key business metrics.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/5. client_and_market_analysis/client_relationship_management_CRM.py:
--------------------------------------------------------------------------------
1 | # Create a lightweight CRM tool tailored for investment banking needs, focusing on tracking interactions, deals, and financial metrics.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/5. client_and_market_analysis/market_sentiment_analysis.py:
--------------------------------------------------------------------------------
1 | # Utilize NLP to gauge market sentiment by analyzing news articles, financial reports, and social media posts
2 | # related to specific industries or deals.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/22. Investment Banking/5. client_and_market_analysis/pitchbook_automation.py:
--------------------------------------------------------------------------------
1 | # Develop a tool to semi-automate the creation of pitchbooks, drawing from a database of slides and charts and
2 | # auto-populating them based on the deal at hand.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/23. trading/algorithmic_trading_bot.py:
--------------------------------------------------------------------------------
1 | # Create a bot that uses technical or statistical signals to make trades.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/23. trading/market_maker_simulator.py:
--------------------------------------------------------------------------------
1 | # Build a simulation for a market-making algorithm that quotes buy and sell prices for securities.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/23. trading/orderbook_visualizer.py:
--------------------------------------------------------------------------------
1 | # Implement a real-time visualization of an exchange's order book, which can help traders make better decisions.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/24. Portfolio Management/portfolio_optimizer.py:
--------------------------------------------------------------------------------
1 | # Use optimization algorithms to calculate the best allocation of assets in a portfolio given various constraints.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/24. Portfolio Management/risk_parity_portfolio.py:
--------------------------------------------------------------------------------
1 | # Create a tool that builds a portfolio based on the risk contributions of each asset.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/24. Portfolio Management/tax_efficient_portfolio_rebalancer.py:
--------------------------------------------------------------------------------
1 | # Write a script that suggests how to rebalance a portfolio in a tax-efficient manner.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/25. Asset Management/alpha_beta_analysis.py:
--------------------------------------------------------------------------------
1 | # Develop a tool that calculates alpha and beta for a given portfolio and benchmarks it against a market index.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/25. Asset Management/etf_tracker.py:
--------------------------------------------------------------------------------
1 | # Build a tool that tracks the components of various ETFs and their respective weightings.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/25. Asset Management/performance_attribution_tool.py:
--------------------------------------------------------------------------------
1 | # Create a tool that decomposes the returns of a portfolio into various factors like market, sector, and stock selection.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/26. Wealth Management/estate_planning_tool.py:
--------------------------------------------------------------------------------
1 | # Create a tool that helps individuals plan the efficient transfer of their estate, taking into account tax considerations
2 | # and inheritance laws.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/26. Wealth Management/retirement_planner.py:
--------------------------------------------------------------------------------
1 | # Implement a retirement calculator that projects the future value of retirement funds based 
2 | # on various investment options and scenarios.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/26. Wealth Management/robo_adviser_prototype.py:
--------------------------------------------------------------------------------
1 | # Build a simplified robo-advisor that suggests an asset allocation based on user's risk tolerance and investment goals.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/27. Multi Asset Risk Model/multi_asset_risk_model.py:
--------------------------------------------------------------------------------
1 | # Develop a risk model that calculates the Value-at-Risk (VaR) and 
2 | # Conditional Value-at-Risk (CVaR) across multiple asset classes.


--------------------------------------------------------------------------------
/5. ML Applications In Finance/28. Personal Financial Management App/personal_financial_management.py:
--------------------------------------------------------------------------------
1 | #  Implement a full-fledged app that helps individuals manage their investments, budgets, and financial goals.


--------------------------------------------------------------------------------
/Brewfile:
--------------------------------------------------------------------------------
 1 | tap "heroku/brew"
 2 | tap "homebrew/bundle"
 3 | tap "homebrew/core"
 4 | brew "autoconf"
 5 | brew "automake"
 6 | brew "carthage"
 7 | brew "chruby"
 8 | brew "cmatrix"
 9 | brew "openssl@1.1"
10 | brew "ruby"
11 | brew "cocoapods"
12 | brew "coreutils"
13 | brew "doctl"
14 | brew "git"
15 | brew "libksba"
16 | brew "libtool"
17 | brew "minetest"
18 | brew "pkg-config"
19 | brew "ruby@3.0"
20 | brew "zlib"
21 | vscode "Dart-Code.dart-code"
22 | vscode "Dart-Code.flutter"
23 | vscode "georgewfraser.vscode-javac"
24 | vscode "janisdd.vscode-edit-csv"
25 | vscode "ms-python.python"
26 | vscode "ms-python.vscode-pylance"
27 | vscode "ms-toolsai.jupyter"
28 | vscode "ms-toolsai.jupyter-keymap"
29 | vscode "ms-toolsai.jupyter-renderers"
30 | vscode "ms-toolsai.vscode-jupyter-cell-tags"
31 | vscode "ms-toolsai.vscode-jupyter-slideshow"
32 | vscode "ms-vscode.cmake-tools"
33 | vscode "ms-vscode.cpptools"
34 | vscode "ms-vscode.cpptools-extension-pack"
35 | vscode "ms-vscode.cpptools-themes"
36 | vscode "msrvida.vscode-sanddance"
37 | vscode "Nash.awesome-flutter-snippets"
38 | vscode "PKief.material-icon-theme"
39 | vscode "qwtel.sqlite-viewer"
40 | vscode "redhat.java"
41 | vscode "tomoki1207.pdf"
42 | vscode "twxs.cmake"
43 | vscode "VisualStudioExptTeam.intellicode-api-usage-examples"
44 | vscode "VisualStudioExptTeam.vscodeintellicode"
45 | vscode "vscjava.vscode-java-debug"
46 | vscode "vscjava.vscode-java-dependency"
47 | vscode "vscjava.vscode-java-pack"
48 | vscode "vscjava.vscode-java-test"
49 | vscode "vscjava.vscode-maven"
50 | vscode "yy0931.vscode-sqlite3-editor"
51 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ⚙️🧬🔐 Machine Learning Models in Finance 💹 🚀🛰️
  2 | 
  3 | This repository contains various machine learning and deep learning models applicable to the financial domain.
  4 | 
  5 | ## Table of Contents 📖 🔬
  6 | 
  7 | - [1. Models Included](#1-models-included-)
  8 | - [2. Dependencies](#2-dependencies-)
  9 | - [3. Installation](#3-installation-)
 10 | - [4. Data Fetching](#4-data-fetching-)
 11 | - [5. Data Preprocessing](#5-data-preprocessing-)
 12 | - [6. Usage](#6-usage-)
 13 | - [7. Models Explained](#7-models-explained-)
 14 | - [8. Beyond The Models: Real-World Applications in Finance](#8-beyond-the-models-real-world-applications-in-finance-)
 15 | - [9. Disclaimer](#9-disclaimer-)
 16 | 
 17 | ## 1. Models Included 🎹 🔮
 18 | 
 19 | The repository consists of the following categories:
 20 | 
 21 | 1. **Supervised Learning Models** 🤝 🗽
 22 |     - Linear Regression
 23 |     - Logistic Regression
 24 |     - Naive Bayes
 25 |     - Random Forest
 26 | 
 27 | 2. **Unsupervised Learning Models** 👾 🦽
 28 |     - Clustering (K-means)
 29 |     - Dimensionality Reduction (PCA)
 30 | 
 31 | 3. **Deep Learning Models** 📡 ⚓️
 32 |     - Supervised Deep Learning Models
 33 |        - Recurrent Neural Networks (LSTM)
 34 |        - Convolutional Neural Networks (CNN)
 35 |     - Unsupervised Deep Learning Models
 36 |        - Autoencoders
 37 |        - Generative Adversarial Networks (GANs)
 38 | 
 39 | 4. **Reinforcement Learning Models** 🦾 🚥
 40 |     - Q-Learning
 41 | 
 42 | ## 2. Dependencies 🥗 🔮
 43 | 
 44 | - Python 3.x
 45 | - yfinance
 46 | - NumPy
 47 | - TensorFlow
 48 | - Scikit-learn
 49 | 
 50 | ## 3. Installation 🧶 🔧
 51 | 
 52 | To install all dependencies, run (make a conda or python virtual environment if needed, optionally):
 53 | 
 54 | ```bash
 55 | pip install -r requirements.txt
 56 | ```
 57 | 
 58 | To install just the essentials needed, run:
 59 | 
 60 | ```bash
 61 | pip install yfinance numpy tensorflow scikit-learn
 62 | ```
 63 | 
 64 | ## 4. Data Fetching 🥽
 65 | Data is fetched using the yfinance library for real-world financial data.
 66 | 
 67 | ```python
 68 | import yfinance as yf
 69 | 
 70 | def fetch_data(ticker, start_date, end_date):
 71 |     return yf.download(ticker, start=start_date, end=end_date)['Close'].values
 72 | ```
 73 | 
 74 | ## 5. Data Preprocessing 🎼
 75 | 
 76 | Data is preprocessed to create training and testing datasets, which are then fed into machine learning models.
 77 | 
 78 | ```python
 79 | import numpy as np
 80 | 
 81 | def create_dataset(data, look_back=1):
 82 |     X, Y = [], []
 83 |     for i in range(len(data) - look_back - 1):
 84 |         a = data[i:(i + look_back)]
 85 |         X.append(a)
 86 |         Y.append(data[i + look_back])
 87 |     return np.array(X), np.array(Y)
 88 | ```
 89 | 
 90 | ## 6. Usage 🛬 🛫 
 91 | 
 92 | Navigate to the respective folder and run the Python script for the model you're interested in.
 93 | 
 94 | ```bash
 95 | python script_name.py
 96 | ```
 97 | 
 98 | ## 7. Models Explained 🗺️
 99 | 
100 | ### 1. Supervised Learning Models 🏗️
101 | 
102 | #### 1.1 Linear Regression 🎢
103 | Linear Regression tries to fit a linear equation to the data, providing a straightforward and effective method for simple predictive tasks.
104 | ![Linear Regression](./1.%20Supervised%20Learning%20Models/linear_regression_summary_with_explanation.png)
105 | 
106 | #### 1.2 Logistic Regression 🛟
107 | Logistic Regression is traditionally used for classification problems but has been adapted here for regression tasks.
108 | ![Logistic Regression](./1.%20Supervised%20Learning%20Models/logistic_regression_summary_with_explanation.png)
109 | 
110 | #### 1.3 Naive Bayes ⛱️
111 | Naive Bayes is particularly useful when you have a small dataset and is based on Bayes' theorem.
112 | ![Naive Bayes](./1.%20Supervised%20Learning%20Models/naive_bayes_summary_with_explanation.png)
113 | 
114 | #### 1.4 Random Forest 🛤️
115 | Random Forest combines multiple decision trees to make a more robust and accurate prediction model.
116 | ![Random Forest](./1.%20Supervised%20Learning%20Models/random_forest_summary_with_explanation.png)
117 | 
118 | ### 2. Unsupervised Learning Models 🛸
119 | 
120 | #### 2.1 Clustering (K-means) 🏟️
121 | K-means clustering is used to partition data into groups based on feature similarity.
122 | ![K-means](./2.%20Unsupervised%20Learning%20Models/kmeans_financial_data_with_explanation.png)
123 | 
124 | #### 2.2 Dimensionality Reduction (PCA) 🚧
125 | PCA is used to reduce the number of features in a dataset while retaining the most relevant information.
126 | ![PCA](./2.%20Unsupervised%20Learning%20Models/PCA_financial_data_with_full_explanation.png)
127 | 
128 | ### 3. Deep Learning Models 🛰️
129 | 
130 | #### 3.1 Supervised Deep Learning Models 🚉
131 | 
132 | ##### 3.1.1 Recurrent Neural Networks (RNNs/LSTM) 🌌
133 | Recurrent Neural Networks, particularly using Long Short-Term Memory (LSTM) units, are highly effective for sequence prediction problems. In finance, they can be used for time-series forecasting like stock price predictions.
134 | 
135 | ![RNNs/LSTM](./3.%20Deep%20Learning%20Models/Apple_Stock_Price_Prediction.png)
136 | 
137 | ##### 3.1.2 Convolutional Neural Networks (CNNs) 📱
138 | Convolutional Neural Networks are primarily used in image recognition but can also be applied in finance for pattern recognition in price charts or for processing alternative data types like satellite images for agriculture commodity predictions.
139 | 
140 | ![CNNs](./3.%20Deep%20Learning%20Models/Financial_News_Sentiment_Analysis.png)
141 | 
142 | #### 3.2 Unsupervised Deep Learning Models 🎛️
143 | 
144 | ##### 3.2.1 Autoencoders 📻
145 | Autoencoders are used for anomaly detection in financial data, identifying unusual patterns that do not conform to expected behavior.
146 | 
147 | ![Autoencoders](./3.%20Deep%20Learning%20Models/Anomaly_Detection_Using_Autoencoder.png)
148 | 
149 | ##### 3.2.2 Generative Adversarial Networks (GANs) ⏲️
150 | GANs are used for simulating different market conditions, helping in risk assessment for various investment strategies.
151 | 
152 | ![GANs](./3.%20Deep%20Learning%20Models/GAN_Financial_Simulation.png)
153 | 
154 | ### 4. Reinforcement Learning Models 🔋
155 | 
156 | #### 4.1 Q-Learning 🔌
157 | Q-Learning is a type of model-free reinforcement learning algorithm used here for stock trading.
158 | ![Q-Learning](./4.%20Reinforcement%20Learning%20Models/Q_Learning_Stock_Trading_YFinance.png)
159 | 
160 | ## 8. Beyond The Models: Real-World Applications in Finance 💸
161 | 
162 | In addition to the core machine learning models that form the backbone of this repository, we'll explore practical applications that span various dimensions of the financial sector. Below is a snapshot of the project's tree structure that gives you an idea of what these applications are:
163 | 
164 | ```
165 | 5. ml_applications_in_finance
166 | │   ├── risk_management
167 | │   ├── decentralized_finance_(DEFI)
168 | │   ├── environmental_social_and_governance_investing_(ESG)
169 | │   ├── behavioural_economics
170 | │   ├── blockchain_and_cryptocurrency
171 | │   ├── explainable_AI_for_finance
172 | │   ├── robotic_process_automation_(RPA)
173 | │   ├── textual_and_alternative_data_for_finance
174 | │   ├── fundamental_analysis
175 | │   ├── satellite_image_analysis_for_finance
176 | │   ├── venture_capital
177 | │   ├── asset_management
178 | │   ├── private_equity
179 | │   ├── investment_banking
180 | │   ├── trading
181 | │   ├── portfolio_management
182 | │   ├── wealth_management
183 | │   ├── multi_asset_risk_model
184 | │   ├── personal_financial_management_app
185 | │   ├── market_analysis_and_prediction
186 | │   ├── customer_service
187 | │   ├── compliance_and_regulatory
188 | │   ├── real_estate
189 | │   ├── supply_chain_finance
190 | │   ├── invoice_management
191 | │   └── cash_management
192 | ```
193 | 
194 | From risk management to blockchain and cryptocurrency, from venture capital to investment banking, and from asset management to personal financial management, we aim to cover a wide array of use-cases. Each of these applications is backed by one or more of the machine learning models described earlier in the repository.
195 | 
196 | **Note**: The list of applications is not exhaustive, and the project is a work in progress. While I aim to continually update it with new techniques and applications, there might be instances where certain modules may be added or removed based on their relevance and effectiveness.
197 | 
198 | 
199 | ## Disclaimer 💳
200 | 
201 | The code provided in this repository is for educational and informational purposes only. It is not intended for live trading or as financial advice. Please exercise caution and conduct your own research before making any investment decisions.
202 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import yfinance as yf
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from sklearn.linear_model import LinearRegression, LogisticRegression
  5 | from sklearn.naive_bayes import GaussianNB
  6 | from sklearn.ensemble import RandomForestRegressor
  7 | from sklearn.model_selection import train_test_split
  8 | from sklearn.metrics import mean_squared_error
  9 | 
 10 | def fetch_data(ticker, start_date, end_date):
 11 |     return yf.download(ticker, start=start_date, end=end_date)['Close'].values
 12 | 
 13 | def create_dataset(data, look_back=1):
 14 |     X, Y = [], []
 15 |     for i in range(len(data) - look_back - 1):
 16 |         a = data[i:(i + look_back)]
 17 |         X.append(a)
 18 |         Y.append(data[i + look_back])
 19 |     return np.array(X), np.array(Y)
 20 | 
 21 | def linear_regression_model(X_train, Y_train, X_test, Y_test):
 22 |     model = LinearRegression()
 23 |     model.fit(X_train, Y_train)
 24 |     pred = model.predict(X_test)
 25 |     print("Linear Regression MSE:", mean_squared_error(Y_test, pred))
 26 | 
 27 | def logistic_regression_model(X_train, Y_train, X_test, Y_test):
 28 |     model = LogisticRegression()
 29 |     model.fit(X_train, Y_train.astype('int'))
 30 |     pred = model.predict(X_test)
 31 |     print("Logistic Regression MSE:", mean_squared_error(Y_test, pred))
 32 | 
 33 | def naive_bayes_model(X_train, Y_train, X_test, Y_test):
 34 |     model = GaussianNB()
 35 |     model.fit(X_train, Y_train.astype('int'))
 36 |     pred = model.predict(X_test)
 37 |     print("Naive Bayes MSE:", mean_squared_error(Y_test, pred))
 38 | 
 39 | def random_forest_model(X_train, Y_train, X_test, Y_test):
 40 |     model = RandomForestRegressor()
 41 |     model.fit(X_train, Y_train)
 42 |     pred = model.predict(X_test)
 43 |     print("Random Forest MSE:", mean_squared_error(Y_test, pred))
 44 | 
 45 | def lstm_model(X_train, Y_train, X_test, Y_test, look_back):
 46 |     model = tf.keras.models.Sequential([
 47 |         tf.keras.layers.LSTM(50, input_shape=(look_back, 1)),
 48 |         tf.keras.layers.Dense(1)
 49 |     ])
 50 |     model.compile(optimizer='adam', loss='mean_squared_error')
 51 |     model.fit(X_train, Y_train, epochs=2, batch_size=1)
 52 |     pred = model.predict(X_test)
 53 |     print("LSTM MSE:", mean_squared_error(Y_test, pred))
 54 | 
 55 | def rnn_model(X_train, Y_train, X_test, Y_test, look_back):
 56 |     model = tf.keras.models.Sequential([
 57 |         tf.keras.layers.SimpleRNN(50, input_shape=(look_back, 1)),
 58 |         tf.keras.layers.Dense(1)
 59 |     ])
 60 |     model.compile(optimizer='adam', loss='mean_squared_error')
 61 |     model.fit(X_train, Y_train, epochs=2, batch_size=1)
 62 |     pred = model.predict(X_test)
 63 |     print("RNN MSE:", mean_squared_error(Y_test, pred))
 64 | 
 65 | class QLearningAgent:
 66 |     def __init__(self, states, actions, alpha=0.1, gamma=0.99, epsilon=0.1):
 67 |         self.states = states
 68 |         self.actions = actions
 69 |         self.alpha = alpha
 70 |         self.gamma = gamma
 71 |         self.epsilon = epsilon
 72 |         self.q_table = np.zeros((self.states, len(self.actions)))
 73 |         
 74 |     def choose_action(self, state):
 75 |         if np.random.uniform(0, 1) < self.epsilon:
 76 |             return np.random.choice(self.actions)
 77 |         else:
 78 |             return np.argmax(self.q_table[state, :])
 79 | 
 80 |     def learn(self, state, action, reward, next_state):
 81 |         predict = self.q_table[state, action]
 82 |         target = reward + self.gamma * np.max(self.q_table[next_state, :])
 83 |         self.q_table[state, action] += self.alpha * (target - predict)
 84 | 
 85 | def reinforcement_learning_q_learning(data, look_back=1):
 86 |     n_actions = 3  # Buy, Sell, Hold
 87 |     agent = QLearningAgent(len(data) - look_back, range(n_actions))
 88 |     state = 0
 89 |     for i in range(0, len(data) - look_back - 1):
 90 |         state = i
 91 |         action = agent.choose_action(state)
 92 |         next_state = state + 1
 93 |         # Here you can define your own reward function based on the action and price change
 94 |         reward = data[next_state] - data[state] if action == 0 else 0  # Simplified reward function
 95 |         agent.learn(state, action, reward, next_state)
 96 | 
 97 |     # Predict the last action based on Q-values. 
 98 |     # You can extend this part to make multiple predictions.
 99 |     final_state = len(data) - look_back - 1
100 |     final_action = agent.choose_action(final_state)
101 |     return final_action
102 | 
103 | def main():
104 |     ticker = 'AAPL'
105 |     look_back = 1
106 |     data = fetch_data(ticker, '2020-01-01', '2021-01-01')
107 |     X, Y = create_dataset(data, look_back)
108 |     X = np.reshape(X, (X.shape[0], X.shape[1], 1))
109 |     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
110 |     X_train_flat = X_train.reshape(X_train.shape[0], look_back)
111 |     X_test_flat = X_test.reshape(X_test.shape[0], look_back)
112 | 
113 |     model_type = input("Enter the model type (linear_regression, logistic_regression, naive_bayes, random_forest, lstm, rnn, reinforcement_learning_q_learning): ")
114 | 
115 |     if model_type == 'linear_regression':
116 |         linear_regression_model(X_train_flat, Y_train, X_test_flat, Y_test)
117 |     elif model_type == 'logistic_regression':
118 |         logistic_regression_model(X_train_flat, Y_train, X_test_flat, Y_test)
119 |     elif model_type == 'naive_bayes':
120 |         naive_bayes_model(X_train_flat, Y_train, X_test_flat, Y_test)
121 |     elif model_type == 'random_forest':
122 |         random_forest_model(X_train_flat, Y_train, X_test_flat, Y_test)
123 |     elif model_type == 'lstm':
124 |         lstm_model(X_train, Y_train, X_test, Y_test, look_back)
125 |     elif model_type == 'rnn':
126 |         rnn_model(X_train, Y_train, X_test, Y_test, look_back)
127 |     elif model_type == 'reinforcement_learning_q_learning':
128 |         final_action = reinforcement_learning_q_learning(data, look_back)
129 |         print(f"Final action suggested by Q-Learning: {['Buy', 'Sell', 'Hold'][final_action]}")
130 | 
131 | if __name__ == "__main__":
132 |     main()
133 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==1.4.0
  2 | altair==5.1.1
  3 | anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1693488585952/work
  4 | appdirs==1.4.4
  5 | appnope @ file:///home/conda/feedstock_root/build_artifacts/appnope_1649077682618/work
  6 | argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1692818318753/work
  7 | argon2-cffi-bindings @ file:///Users/runner/miniforge3/conda-bld/argon2-cffi-bindings_1666850758378/work
  8 | arrow @ file:///home/conda/feedstock_root/build_artifacts/arrow_1662382474514/work
  9 | asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1694046349000/work
 10 | astunparse==1.6.3
 11 | async-lru @ file:///home/conda/feedstock_root/build_artifacts/async-lru_1690563019058/work
 12 | attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1683424013410/work
 13 | Babel @ file:///home/conda/feedstock_root/build_artifacts/babel_1677767029043/work
 14 | backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
 15 | backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1687772187254/work
 16 | beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1680888073205/work
 17 | bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1674535352125/work
 18 | blinker==1.6.2
 19 | Brotli @ file:///Users/runner/miniforge3/conda-bld/brotli-split_1693583678882/work
 20 | cached-property @ file:///home/conda/feedstock_root/build_artifacts/cached_property_1615209429212/work
 21 | cachetools==5.3.1
 22 | certifi==2023.7.22
 23 | cffi @ file:///Users/runner/miniforge3/conda-bld/cffi_1671179612308/work
 24 | charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1688813409104/work
 25 | click==8.1.7
 26 | comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1691044910542/work
 27 | contourpy==1.1.0
 28 | cycler==0.11.0
 29 | debugpy @ file:///Users/runner/miniforge3/conda-bld/debugpy_1694118104592/work
 30 | decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
 31 | defusedxml @ file:///home/conda/feedstock_root/build_artifacts/defusedxml_1615232257335/work
 32 | entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work
 33 | exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1692026125334/work
 34 | executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1667317341051/work
 35 | fastjsonschema @ file:///home/conda/feedstock_root/build_artifacts/python-fastjsonschema_1690055433477/work/dist
 36 | flatbuffers==23.5.26
 37 | fonttools==4.42.1
 38 | fqdn @ file:///home/conda/feedstock_root/build_artifacts/fqdn_1638810296540/work/dist
 39 | fredapi==0.5.1
 40 | frozendict==2.3.8
 41 | gast==0.4.0
 42 | gitdb==4.0.10
 43 | GitPython==3.1.36
 44 | google-auth==2.22.0
 45 | google-auth-oauthlib==1.0.0
 46 | google-pasta==0.2.0
 47 | grpcio==1.58.0
 48 | h5py==3.9.0
 49 | html5lib==1.1
 50 | idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1663625384323/work
 51 | importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1688754491823/work
 52 | importlib-resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1691408075105/work
 53 | ipykernel @ file:///Users/runner/miniforge3/conda-bld/ipykernel_1693880377119/work
 54 | ipython @ file:///Users/runner/miniforge3/conda-bld/ipython_1693580003080/work
 55 | ipython-genutils==0.2.0
 56 | ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1690877070294/work
 57 | isoduration @ file:///home/conda/feedstock_root/build_artifacts/isoduration_1638811571363/work/dist
 58 | jax==0.4.14
 59 | jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1690896916983/work
 60 | Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1654302431367/work
 61 | joblib==1.3.2
 62 | json5 @ file:///home/conda/feedstock_root/build_artifacts/json5_1688248289187/work
 63 | jsonpointer==2.0
 64 | jsonschema @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-meta_1691761378595/work
 65 | jsonschema-specifications @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-specifications_1689701150890/work
 66 | jupyter @ file:///Users/runner/miniforge3/conda-bld/jupyter_1670249893813/work
 67 | jupyter-console @ file:///home/conda/feedstock_root/build_artifacts/jupyter_console_1678118109161/work
 68 | jupyter-events @ file:///home/conda/feedstock_root/build_artifacts/jupyter_events_1691505939576/work
 69 | jupyter-lsp @ file:///home/conda/feedstock_root/build_artifacts/jupyter-lsp-meta_1685453365113/work/jupyter-lsp
 70 | jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1693317508789/work
 71 | jupyter_core @ file:///Users/runner/miniforge3/conda-bld/jupyter_core_1686775757864/work
 72 | jupyter_server @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_1693487358826/work
 73 | jupyter_server_terminals @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_terminals_1673491454549/work
 74 | jupyterlab @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_1692015883666/work
 75 | jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1649936611996/work
 76 | jupyterlab-widgets @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_widgets_1688489450369/work
 77 | jupyterlab_server @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_server_1690205927615/work
 78 | keras==2.12.0
 79 | kiwisolver==1.4.5
 80 | libclang==16.0.6
 81 | lxml==4.9.3
 82 | Markdown==3.4.4
 83 | markdown-it-py==3.0.0
 84 | MarkupSafe @ file:///Users/runner/miniforge3/conda-bld/markupsafe_1685769179270/work
 85 | matplotlib==3.7.2
 86 | matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work
 87 | mdurl==0.1.2
 88 | mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1692116650819/work
 89 | ml-dtypes==0.2.0
 90 | multitasking==0.0.11
 91 | nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1684790896106/work
 92 | nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert-meta_1693331710275/work
 93 | nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1690814868471/work
 94 | nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1664684991461/work
 95 | notebook @ file:///home/conda/feedstock_root/build_artifacts/notebook_1693410793506/work
 96 | notebook_shim @ file:///home/conda/feedstock_root/build_artifacts/notebook-shim_1682360583588/work
 97 | numpy==1.23.5
 98 | oauthlib==3.2.2
 99 | opt-einsum==3.3.0
100 | overrides @ file:///home/conda/feedstock_root/build_artifacts/overrides_1691338815398/work
101 | packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1681337016113/work
102 | pandas @ file:///Users/runner/miniforge3/conda-bld/pandas_1693415364816/work
103 | pandas-datareader==0.10.0
104 | pandocfilters @ file:///home/conda/feedstock_root/build_artifacts/pandocfilters_1631603243851/work
105 | parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work
106 | patsy==0.5.3
107 | pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1667297516076/work
108 | pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
109 | Pillow==9.5.0
110 | pkgutil_resolve_name @ file:///home/conda/feedstock_root/build_artifacts/pkgutil-resolve-name_1633981968097/work
111 | platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1690813113769/work
112 | ply==3.11
113 | prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1689032443210/work
114 | prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1688565951714/work
115 | protobuf==4.24.3
116 | psutil @ file:///Users/runner/miniforge3/conda-bld/psutil_1681775196112/work
117 | ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
118 | pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work
119 | pyarrow==13.0.0
120 | pyasn1==0.5.0
121 | pyasn1-modules==0.3.0
122 | pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work
123 | pydeck==0.8.0
124 | Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1691408637400/work
125 | Pympler==1.0.1
126 | pyobjc-core @ file:///Users/runner/miniforge3/conda-bld/pyobjc-core_1686129336286/work
127 | pyobjc-framework-Cocoa @ file:///Users/runner/miniforge3/conda-bld/pyobjc-framework-cocoa_1686136009200/work
128 | pyparsing==3.0.9
129 | PyQt5==5.15.9
130 | PyQt5-sip==12.12.2
131 | PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1661604839144/work
132 | python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
133 | python-json-logger @ file:///home/conda/feedstock_root/build_artifacts/python-json-logger_1677079630776/work
134 | pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1693930252784/work
135 | pytz-deprecation-shim==0.1.0.post0
136 | PyYAML @ file:///Users/runner/miniforge3/conda-bld/pyyaml_1692737410683/work
137 | pyzmq @ file:///Users/runner/miniforge3/conda-bld/pyzmq_1691667591386/work
138 | qtconsole @ file:///home/conda/feedstock_root/build_artifacts/qtconsole-base_1693604303222/work
139 | QtPy @ file:///home/conda/feedstock_root/build_artifacts/qtpy_1693347765905/work
140 | referencing @ file:///home/conda/feedstock_root/build_artifacts/referencing_1691337268233/work
141 | requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1684774241324/work
142 | requests-oauthlib==1.3.1
143 | rfc3339-validator @ file:///home/conda/feedstock_root/build_artifacts/rfc3339-validator_1638811747357/work
144 | rfc3986-validator @ file:///home/conda/feedstock_root/build_artifacts/rfc3986-validator_1598024191506/work
145 | rich==13.5.3
146 | rpds-py @ file:///Users/runner/miniforge3/conda-bld/rpds-py_1693850362535/work
147 | rsa==4.9
148 | scikit-learn==1.3.0
149 | scipy==1.11.2
150 | seaborn==0.12.2
151 | Send2Trash @ file:///Users/runner/miniforge3/conda-bld/send2trash_1682601407921/work
152 | sip @ file:///Users/runner/miniforge3/conda-bld/sip_1690986115414/work
153 | six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
154 | smmap==5.0.1
155 | sniffio @ file:///home/conda/feedstock_root/build_artifacts/sniffio_1662051266223/work
156 | soupsieve @ file:///home/conda/feedstock_root/build_artifacts/soupsieve_1693929250441/work
157 | stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work
158 | statsmodels==0.14.0
159 | streamlit==1.26.0
160 | tenacity==8.2.3
161 | tensorboard==2.12.3
162 | tensorboard-data-server==0.7.1
163 | tensorflow==2.13.0
164 | tensorflow-estimator==2.12.0
165 | tensorflow-io-gcs-filesystem==0.34.0
166 | tensorflow-macos==2.12.0
167 | tensorflow-metal==1.0.0
168 | termcolor==2.3.0
169 | terminado @ file:///Users/runner/miniforge3/conda-bld/terminado_1670254106711/work
170 | threadpoolctl==3.2.0
171 | tinycss2 @ file:///home/conda/feedstock_root/build_artifacts/tinycss2_1666100256010/work
172 | toml @ file:///home/conda/feedstock_root/build_artifacts/toml_1604308577558/work
173 | tomli @ file:///home/conda/feedstock_root/build_artifacts/tomli_1644342247877/work
174 | toolz==0.12.0
175 | tornado @ file:///Users/runner/miniforge3/conda-bld/tornado_1692311824797/work
176 | traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1675110562325/work
177 | typing-utils @ file:///home/conda/feedstock_root/build_artifacts/typing_utils_1622899189314/work
178 | typing_extensions==4.5.0
179 | tzdata @ file:///home/conda/feedstock_root/build_artifacts/python-tzdata_1680081134351/work
180 | tzlocal==4.3.1
181 | uri-template @ file:///home/conda/feedstock_root/build_artifacts/uri-template_1688655812972/work/dist
182 | urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1689789803562/work
183 | validators==0.22.0
184 | wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1673864653149/work
185 | webcolors @ file:///home/conda/feedstock_root/build_artifacts/webcolors_1679900785843/work
186 | webencodings==0.5.1
187 | websocket-client @ file:///home/conda/feedstock_root/build_artifacts/websocket-client_1692730992302/work
188 | Werkzeug==2.3.7
189 | widgetsnbextension @ file:///home/conda/feedstock_root/build_artifacts/widgetsnbextension_1688504439014/work
190 | wrapt==1.14.1
191 | yfinance==0.2.28
192 | zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1689374466814/work
193 | 


--------------------------------------------------------------------------------