├── Data └── Data_preprocessing.ipynb /Data: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Data_preprocessing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "id": "vgSP2IAvTSio" 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "import pandas as pd\n", 22 | "from sklearn.model_selection import train_test_split\n", 23 | "from sklearn.preprocessing import StandardScaler # For feature scaling\n", 24 | "from sklearn.linear_model import LinearRegression # Replace with your chosen model" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "id": "xNbXMWtSXLkL" 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "# Access a specific column (e.g., Close price)\n", 36 | "close_prices = data['Close']\n", 37 | "\n", 38 | "# Calculate statistics on a column (e.g., average Close price)\n", 39 | "average_close = close_prices.mean()\n", 40 | "\n", 41 | "# Filter data based on a condition (e.g., Close price higher than 100)\n", 42 | "high_close_days = data[data['Close'] > 100]" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "id": "FJXkayR_XT_f" 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "# Add a new column (e.g., Daily Change)\n", 54 | "data['Daily Change'] = data['Close'] - data['Open']\n", 55 | "\n", 56 | "# Drop a column (e.g., Adj Close)\n", 57 | "data.drop('Adj Close', axis=1, inplace=True) # Modifies original data\n", 58 | "\n", 59 | "# Rename a column (e.g., Low to Min Price)\n", 60 | "data.rename(columns={'Low': 'Min Price'}, inplace=True) # Modifies original data" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "id": "8sPWK1hwZULW" 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "data = pd.read_csv('/content/MarketData2.csv')" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "id": "EcVLTIYYZbmI" 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "# Define the target variable (e.g., predict future closing price)\n", 83 | "target_variable = 'Close'" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "id": "I74ACYImZgZb" 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "features = ['Open', 'High', 'Low', 'Volume']" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "colab": { 102 | "background_save": true 103 | }, 104 | "id": "Su_Cw0k2ZkiP" 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "X = data[features]\n", 109 | "y = data[target_variable]" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "id": "FXwhV8HfZp_u" 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" 121 | ] 122 | } 123 | ], 124 | "metadata": { 125 | "colab": { 126 | "provenance": [], 127 | "authorship_tag": "ABX9TyNt1GTt78FG3n2oICFBtDyO", 128 | "include_colab_link": true 129 | }, 130 | "kernelspec": { 131 | "display_name": "Python 3", 132 | "name": "python3" 133 | }, 134 | "language_info": { 135 | "name": "python" 136 | } 137 | }, 138 | "nbformat": 4, 139 | "nbformat_minor": 0 140 | } --------------------------------------------------------------------------------