└── bias and variance .ipynb /bias and variance .ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","execution_count":1,"id":"4ba00e2a","metadata":{"scrolled":true,"id":"4ba00e2a","executionInfo":{"status":"error","timestamp":1709094098034,"user_tz":-330,"elapsed":3143,"user":{"displayName":"Jeyashri A","userId":"13160687947720331538"}},"outputId":"481a70cc-8f5a-4ed8-b497-17f3d303f55d","colab":{"base_uri":"https://localhost:8080/","height":1000}},"outputs":[{"output_type":"error","ename":"ImportError","evalue":"\n`load_boston` has been removed from scikit-learn since version 1.2.\n\nThe Boston housing prices dataset has an ethical problem: as\ninvestigated in [1], the authors of this dataset engineered a\nnon-invertible variable \"B\" assuming that racial self-segregation had a\npositive impact on house prices [2]. Furthermore the goal of the\nresearch that led to the creation of this dataset was to study the\nimpact of air quality but it did not give adequate demonstration of the\nvalidity of this assumption.\n\nThe scikit-learn maintainers therefore strongly discourage the use of\nthis dataset unless the purpose of the code is to study and educate\nabout ethical issues in data science and machine learning.\n\nIn this special case, you can fetch the dataset from the original\nsource::\n\n import pandas as pd\n import numpy as np\n\n data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n target = raw_df.values[1::2, 2]\n\nAlternative datasets include the California housing dataset and the\nAmes housing dataset. You can load the datasets as follows::\n\n from sklearn.datasets import fetch_california_housing\n housing = fetch_california_housing()\n\nfor the California housing dataset and::\n\n from sklearn.datasets import fetch_openml\n housing = fetch_openml(name=\"house_prices\", as_frame=True)\n\nfor the Ames housing dataset.\n\n[1] M Carlisle.\n\"Racist data destruction?\"\n\n\n[2] Harrison Jr, David, and Daniel L. Rubinfeld.\n\"Hedonic housing prices and the demand for clean air.\"\nJournal of environmental economics and management 5.1 (1978): 81-102.\n\n","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear_model\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mLinearRegression\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mLasso\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatasets\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mload_boston\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_boston\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreturn_X_y\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/sklearn/datasets/__init__.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(name)\u001b[0m\n\u001b[1;32m 154\u001b[0m \"\"\"\n\u001b[1;32m 155\u001b[0m )\n\u001b[0;32m--> 156\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 157\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mglobals\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mImportError\u001b[0m: \n`load_boston` has been removed from scikit-learn since version 1.2.\n\nThe Boston housing prices dataset has an ethical problem: as\ninvestigated in [1], the authors of this dataset engineered a\nnon-invertible variable \"B\" assuming that racial self-segregation had a\npositive impact on house prices [2]. Furthermore the goal of the\nresearch that led to the creation of this dataset was to study the\nimpact of air quality but it did not give adequate demonstration of the\nvalidity of this assumption.\n\nThe scikit-learn maintainers therefore strongly discourage the use of\nthis dataset unless the purpose of the code is to study and educate\nabout ethical issues in data science and machine learning.\n\nIn this special case, you can fetch the dataset from the original\nsource::\n\n import pandas as pd\n import numpy as np\n\n data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n target = raw_df.values[1::2, 2]\n\nAlternative datasets include the California housing dataset and the\nAmes housing dataset. You can load the datasets as follows::\n\n from sklearn.datasets import fetch_california_housing\n housing = fetch_california_housing()\n\nfor the California housing dataset and::\n\n from sklearn.datasets import fetch_openml\n housing = fetch_openml(name=\"house_prices\", as_frame=True)\n\nfor the Ames housing dataset.\n\n[1] M Carlisle.\n\"Racist data destruction?\"\n\n\n[2] Harrison Jr, David, and Daniel L. Rubinfeld.\n\"Hedonic housing prices and the demand for clean air.\"\nJournal of environmental economics and management 5.1 (1978): 81-102.\n\n","","\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"],"errorDetails":{"actions":[{"action":"open_url","actionText":"Open Examples","url":"/notebooks/snippets/importing_libraries.ipynb"}]}}],"source":["\n","from mlxtend.evaluate import bias_variance_decomp\n","from sklearn.model_selection import train_test_split\n","import pandas as pd\n","import numpy as np\n","from sklearn.linear_model import LinearRegression, Lasso\n","from sklearn.datasets import load_boston\n","from sklearn import metrics\n","X, y = load_boston(return_X_y=True)\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)\n","model_lr = LinearRegression()\n","mse,bias, var = bias_variance_decomp(model_lr, X_train, y_train, X_test, y_test, loss='mse', num_rounds=200, random_seed=123)\n","y_pred=model_lr.predict(X_test)\n","print('MSE from bias_variance lib [avg expected loss]: %.3f' % mse)\n","print('Avg Bias: %.3f' % bias)\n","print('Avg Variance: %.3f' % var)\n"]}],"metadata":{"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.9.12"},"colab":{"provenance":[]}},"nbformat":4,"nbformat_minor":5} --------------------------------------------------------------------------------