├── access_data.ipynb
├── chatgpt_ds.ipynb
├── churn_gpt3_ds.ipynb
├── customer_segmentation_chatgpt_ds.ipynb
├── demand_gpt3_ds.ipynb
├── helper_class_ml.ipynb
├── hyperparameter_tuning_tutorial.ipynb
├── imputer.ipynb
├── list_comp_generators.ipynb
├── p_values_ml.ipynb
├── portfolio_optimization.ipynb
├── predictive_modeling_attrition.ipynb
├── richest_people_eda.ipynb
├── spotify_churn_sythentic.csv
├── stable_diffusion
    └── hello.py
├── synthetic_best_buy_data.csv
├── tabgan_experiements.ipynb
└── time_series_oop.ipynb


/chatgpt_ds.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"code","source":"%pip install openai\n%pip install catboost","metadata":{"tags":[],"cell_id":"0adf8006093a451895d31f046cb00961","allow_embed":"code_output","source_hash":"8c175969","execution_start":1673363490242,"execution_millis":11359,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"Collecting openai\n  Downloading openai-0.26.0.tar.gz (54 kB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.7/54.7 KB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25h  Installing build dependencies ... \u001b[?25ldone\n\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n\u001b[?25hRequirement already satisfied: requests>=2.20 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from openai) (2.28.1)\nRequirement already satisfied: aiohttp in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from openai) (3.8.3)\nRequirement already satisfied: tqdm in /shared-libs/python3.9/py/lib/python3.9/site-packages (from openai) (4.64.1)\nRequirement already satisfied: idna<4,>=2.5 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from requests>=2.20->openai) (3.4)\nRequirement already satisfied: certifi>=2017.4.17 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from requests>=2.20->openai) (2022.9.24)\nRequirement already satisfied: urllib3<1.27,>=1.21.1 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from requests>=2.20->openai) (1.26.12)\nRequirement already satisfied: charset-normalizer<3,>=2 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from requests>=2.20->openai) (2.1.1)\nRequirement already satisfied: frozenlist>=1.1.1 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->openai) (1.3.1)\nRequirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->openai) (4.0.2)\nRequirement already satisfied: aiosignal>=1.1.2 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->openai) (1.2.0)\nRequirement already satisfied: yarl<2.0,>=1.0 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->openai) (1.8.1)\nRequirement already satisfied: attrs>=17.3.0 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->openai) (22.1.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->openai) (6.0.2)\nBuilding wheels for collected packages: openai\n  Building wheel for openai (pyproject.toml) ... \u001b[?25ldone\n\u001b[?25h  Created wheel for openai: filename=openai-0.26.0-py3-none-any.whl size=66833 sha256=0521d4ca265f85c6ae8e6844f2a80bc750427b6b74cb99a5aba252e6d57d6259\n  Stored in directory: /root/.cache/pip/wheels/50/85/93/3c090d89fb182ca03a781eff1f7195ec4a893dbeea5ae964dc\nSuccessfully built openai\nInstalling collected packages: openai\nSuccessfully installed openai-0.26.0\n\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n\u001b[0m\u001b[33mWARNING: You are using pip version 22.0.4; however, version 22.3.1 is available.\nYou should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n\u001b[0mNote: you may need to restart the kernel to use updated packages.\nCollecting catboost\n  Downloading catboost-1.1.1-cp39-none-manylinux1_x86_64.whl (76.6 MB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.6/76.6 MB\u001b[0m \u001b[31m36.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hRequirement already satisfied: plotly in /shared-libs/python3.9/py/lib/python3.9/site-packages (from catboost) (5.10.0)\nRequirement already satisfied: scipy in /shared-libs/python3.9/py/lib/python3.9/site-packages (from catboost) (1.9.3)\nRequirement already satisfied: matplotlib in /shared-libs/python3.9/py/lib/python3.9/site-packages (from catboost) (3.6.0)\nCollecting graphviz\n  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.0/47.0 KB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hRequirement already satisfied: pandas>=0.24.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from catboost) (1.2.5)\nRequirement already satisfied: numpy>=1.16.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from catboost) (1.23.4)\nRequirement already satisfied: six in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from catboost) (1.16.0)\nRequirement already satisfied: python-dateutil>=2.7.3 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from pandas>=0.24.0->catboost) (2.8.2)\nRequirement already satisfied: pytz>=2017.3 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from pandas>=0.24.0->catboost) (2022.5)\nRequirement already satisfied: cycler>=0.10 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from matplotlib->catboost) (0.11.0)\nRequirement already satisfied: fonttools>=4.22.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from matplotlib->catboost) (4.37.4)\nRequirement already satisfied: pyparsing>=2.2.1 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from matplotlib->catboost) (3.0.9)\nRequirement already satisfied: kiwisolver>=1.0.1 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from matplotlib->catboost) (1.4.4)\nRequirement already satisfied: pillow>=6.2.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from matplotlib->catboost) (9.2.0)\nRequirement already satisfied: contourpy>=1.0.1 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from matplotlib->catboost) (1.0.5)\nRequirement already satisfied: packaging>=20.0 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from matplotlib->catboost) (21.3)\nRequirement already satisfied: tenacity>=6.2.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from plotly->catboost) (8.1.0)\nInstalling collected packages: graphviz, catboost\nSuccessfully installed catboost-1.1.1 graphviz-0.20.1\n\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n\u001b[0m\u001b[33mWARNING: You are using pip version 22.0.4; however, version 22.3.1 is available.\nYou should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n\u001b[0mNote: you may need to restart the kernel to use updated packages.\n","output_type":"stream"}],"execution_count":1},{"cell_type":"code","source":"import openai\nopenai.api_key = \"\"\n","metadata":{"tags":[],"cell_id":"f8dbdb5478f749439db51e9911865499","allow_embed":"code_output","source_hash":"df671972","execution_start":1673365907749,"execution_millis":1,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":50},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"What is the pandas library?\", max_tokens=1000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"a180f55a9c91496397774bef72d7973f","allow_embed":"code_output","source_hash":"76b98026","execution_start":1673365727131,"execution_millis":2539,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\nPandas is an open source software library written in Python for data manipulation and analysis. Pandas is widely used in data science, machine learning and many other fields. It provides high-level data structures and tools for handling and manipulating data, including data frames, series, plotting tools and more.\n","output_type":"stream"}],"execution_count":48},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"what are some common Pandas use cases?\", max_tokens=240)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"2760765f78ac4f3b90287f6512348607","allow_embed":"code_output","source_hash":"e0759cfc","execution_start":1673363502724,"execution_millis":1964,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n1. Data Cleaning and Transformation\n2. Data Analysis and Exploration\n3. Time Series Analysis\n4. Data Visualization\n5. Statistical Modeling\n6. Predictive Modeling\n7. Machine Learning\n8. Web Scraping\n","output_type":"stream"}],"execution_count":3},{"cell_type":"code","source":"#what are the most common deep learning libraries?\ncompletion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"what are the most common deep learning libraries?\", max_tokens=240)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"65cf1dd02cc7408bbcc2ee9fb0e64cb5","allow_embed":"code_output","source_hash":"1d073d9f","execution_start":1673363504691,"execution_millis":2997,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n1. TensorFlow \n2. PyTorch \n3. Keras \n4. Caffe \n5. CNTK \n6. MXNet \n7. Theano \n8. Deeplearning4j \n9. Gensim \n10. LUNA\n","output_type":"stream"}],"execution_count":4},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"What is a deep neural network?\", max_tokens=1000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"13c87c1765004e44bcf533bad839146a","allow_embed":"code_output","source_hash":"fc1559af","execution_start":1673363507691,"execution_millis":2509,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\nA deep neural network (DNN) is a type of artificial neural network (ANN) with multiple layers of neurons between the input and output layers. DNNs are designed to learn complex non-linear relationships from data, and have been successfully applied in a wide range of areas such as image recognition, natural language processing, and financial forecasting.\n","output_type":"stream"}],"execution_count":5},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"give some ideas on feature transformations that can improve model performance\", max_tokens=1000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"2413189d53874e22b3c645a0b1a09aee","allow_embed":"code_output","source_hash":"1cb10d06","execution_start":1673363510201,"execution_millis":5732,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n1. Standardization/Normalization: A common feature transform used to ensure features are on the same scale, standardizing or normalizing variables can help limit the severity of outliers and improve the overall model performance.\n\n2. Feature Binning: Binning is a process of transforming numerical variables into categorical ones. This can be useful when working with variables that have too many levels and can have a significant effect on the model performance.\n\n3. Polynomial Expansion: When a nonlinear relationship is expected between features and the output variable, a polynomial expansion feature transformation can help improve model performance.\n\n4. Feature Selection: Removing redundant or irrelevant features from the dataset can help improve the model performance as these features may lead to overfitting.\n\n5. Ensemble: Combining different types of models (or different versions of the same model) can often improve performance due to their combined capabilities.\n","output_type":"stream"}],"execution_count":6},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"Write example python code that performs data standardization\", max_tokens=1000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"af12900045b2478b93ab9736b64e882b","allow_embed":"code_output","source_hash":"b15dbb8b","execution_start":1673363515937,"execution_millis":4579,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n# Import the necessary libraries\nimport numpy as np\n\n# Define the data \ndata = np.array([[-3, 9, 0, 8],\n                 [ 4, 6, 5, 12],\n                 [20, 2, 3, 15]])\n\n# Calculate mean and standard deviation\nmean = np.mean(data, axis=0)\nstd = np.std(data, axis=0)\n\n# Perform data standardization\nstandardized_data = (data - mean) / std\n\n# Print the results\nprint(standardized_data)\n","output_type":"stream"}],"execution_count":7},{"cell_type":"code","source":"# Import the necessary libraries\nimport numpy as np\n\n# Define the data \ndata = np.array([[-3, 9, 0, 8],\n                 [ 4, 6, 5, 12],\n                 [20, 2, 3, 15]])\n\n# Calculate mean and standard deviation\nmean = np.mean(data, axis=0)\nstd = np.std(data, axis=0)\n\n# Perform data standardization\nstandardized_data = (data - mean) / std\n\n# Print the results\nprint(standardized_data)","metadata":{"tags":[],"cell_id":"32e7ec7fc04b4a29962cb69a6e1a25af","allow_embed":"code_output","source_hash":"2f0bcad0","execution_start":1673363633656,"execution_millis":4,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"[[-1.03881504  1.16247639 -1.29777137 -1.27872403]\n [-0.31164451  0.11624764  1.13554995  0.11624764]\n [ 1.35045955 -1.27872403  0.16222142  1.16247639]]\n","output_type":"stream"}],"execution_count":12},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"Write example python code that performs data normalization on fake data\", max_tokens=1000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"a924a7e8372a4189b039844383357660","allow_embed":"code_output","source_hash":"a87e1c85","execution_start":1673363909417,"execution_millis":4670,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n# Normalizing data will rescale features in the range [0,1]\n\ndata = [3, 7, 10, 13] # Sample data\n\n# Calculate the maximum and minimum of the data\nmax_data = max(data)\nmin_data = min(data)\n\n# Normalize the data\nnormalized_data = [(x-min_data)/(max_data-min_data) for x in data]\n\n# Print first value to check \nprint(normalized_data[0]) # Prints 0.2\n","output_type":"stream"}],"execution_count":19},{"cell_type":"code","source":"\n# Normalizing data will rescale features in the range [0,1]\n\ndata = [3, 7, 10, 13] # Sample data\n\n# Calculate the maximum and minimum of the data\nmax_data = max(data)\nmin_data = min(data)\n\n# Normalize the data\nnormalized_data = [(x-min_data)/(max_data-min_data) for x in data]\n\n# Print first value to check \nprint(normalized_data) \n","metadata":{"tags":[],"cell_id":"4dcd582d6443441b97b4652d53795756","allow_embed":"code_output","source_hash":"8d36fcfa","execution_start":1673363932199,"execution_millis":3,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"[0.0, 0.4, 0.7, 1.0]\n","output_type":"stream"}],"execution_count":null},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"How do i select a time series model?\", max_tokens=1000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"3906c30630854d749bfee27b410c8b19","allow_embed":"code_output","source_hash":"431e6b6e","execution_start":1673363723013,"execution_millis":4807,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\nThe selection of a time series model depends on the type of data that is being analyzed. If there are long-term trends or cycles in the data, then an autoregressive integrated moving average (ARIMA) model or a exponential smoothing (ETS) model may be the best option. If the data are seasonal or there are outliers present, then a Holt-Winters model may be a better choice. If the data do not contain any long-term trends or seasonality, a Box-Jenkins model may be suitable. Regardless of the model selected, it is important to consider the type of data that is being modeled when selecting a time series model.\n","output_type":"stream"}],"execution_count":17},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"When to use ARIMA vs FB prophet?\", max_tokens=1000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"dd459a0b0c2042f68abbe1c77c8cd789","allow_embed":"code_output","source_hash":"aa5a6656","execution_start":1673363744423,"execution_millis":3699,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\nARIMA and FB Prophet are both used for time series forecasting. ARIMA is a linear model and is better for predicting long-term trends with stationary data. FB Prophet is a non-linear model and is better for predicting short-term trends with non-stationary data. So, you would use ARIMA when you are mainly concerned with predicting long-term trends, and you would use FB Prophet when you are mainly concerned with predicting short-term trends.\n","output_type":"stream"}],"execution_count":18},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"Write example python code that generates synthetic healthcare readmission data stored in a dataframe\", max_tokens=1000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"f997e7a71c57471c81dba210622b0ffe","allow_embed":"code_output","source_hash":"7160f974","execution_start":1673364364069,"execution_millis":5794,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\nimport pandas as pd\nimport numpy as np\n\n# Create Dataframe \n\ndf = pd.DataFrame(columns=['Patient_ID', 'Age', 'Admission_Type', 'Readmitted'])\n\n# Generate Data\n\nnp.random.seed(0)\nfor i in range(10):\n  admission_type = np.random.choice(['Urgent', 'Scheduled', 'Emergency'])\n  patient_age = np.random.randint(18, 80)\n  readmission = np.random.choice([0, 1])\n  df.loc[i] = [i+1, patient_age, admission_type, readmission]\n\n# Print Dataframe to Console\n\nprint(df)\n","output_type":"stream"}],"execution_count":22},{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\n\n# Create Dataframe \n\ndf = pd.DataFrame(columns=['Patient_ID', 'Age', 'Admission_Type', 'Readmitted'])\n\n# Generate Data\n\nnp.random.seed(0)\nfor i in range(10):\n  admission_type = np.random.choice(['Urgent', 'Scheduled', 'Emergency'])\n  patient_age = np.random.randint(18, 80)\n  readmission = np.random.choice([0, 1])\n  df.loc[i] = [i+1, patient_age, admission_type, readmission]\n\n# Print Dataframe to Console\n\ndf\n","metadata":{"tags":[],"cell_id":"3b5d1a15336a44efbc67b9734d8ff466","allow_embed":"code_output","source_hash":"1f170973","execution_start":1673364389122,"execution_millis":22,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","execution_count":23,"data":{"application/vnd.deepnote.dataframe.v3+json":{"column_count":4,"row_count":10,"columns":[{"name":"Patient_ID","dtype":"object","stats":{"unique_count":10,"nan_count":0,"categories":[{"name":"1","count":1},{"name":"2","count":1},{"name":"8 others","count":8}]}},{"name":"Age","dtype":"object","stats":{"unique_count":9,"nan_count":0,"categories":[{"name":"42","count":2},{"name":"65","count":1},{"name":"7 others","count":7}]}},{"name":"Admission_Type","dtype":"object","stats":{"unique_count":3,"nan_count":0,"categories":[{"name":"Urgent","count":4},{"name":"Emergency","count":4},{"name":"Scheduled","count":2}]}},{"name":"Readmitted","dtype":"object","stats":{"unique_count":2,"nan_count":0,"categories":[{"name":"1","count":8},{"name":"0","count":2}]}},{"name":"_deepnote_index_column","dtype":"int64"}],"rows":[{"Patient_ID":"1","Age":"65","Admission_Type":"Urgent","Readmitted":"1","_deepnote_index_column":"0"},{"Patient_ID":"2","Age":"21","Admission_Type":"Urgent","Readmitted":"1","_deepnote_index_column":"1"},{"Patient_ID":"3","Age":"37","Admission_Type":"Scheduled","Readmitted":"1","_deepnote_index_column":"2"},{"Patient_ID":"4","Age":"54","Admission_Type":"Emergency","Readmitted":"1","_deepnote_index_column":"3"},{"Patient_ID":"5","Age":"42","Admission_Type":"Emergency","Readmitted":"0","_deepnote_index_column":"4"},{"Patient_ID":"6","Age":"76","Admission_Type":"Urgent","Readmitted":"1","_deepnote_index_column":"5"},{"Patient_ID":"7","Age":"57","Admission_Type":"Emergency","Readmitted":"1","_deepnote_index_column":"6"},{"Patient_ID":"8","Age":"42","Admission_Type":"Emergency","Readmitted":"1","_deepnote_index_column":"7"},{"Patient_ID":"9","Age":"43","Admission_Type":"Scheduled","Readmitted":"1","_deepnote_index_column":"8"},{"Patient_ID":"10","Age":"27","Admission_Type":"Urgent","Readmitted":"0","_deepnote_index_column":"9"}]},"text/plain":"  Patient_ID Age Admission_Type Readmitted\n0          1  65         Urgent          1\n1          2  21         Urgent          1\n2          3  37      Scheduled          1\n3          4  54      Emergency          1\n4          5  42      Emergency          0\n5          6  76         Urgent          1\n6          7  57      Emergency          1\n7          8  42      Emergency          1\n8          9  43      Scheduled          1\n9         10  27         Urgent          0","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Patient_ID</th>\n      <th>Age</th>\n      <th>Admission_Type</th>\n      <th>Readmitted</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>65</td>\n      <td>Urgent</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>21</td>\n      <td>Urgent</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3</td>\n      <td>37</td>\n      <td>Scheduled</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4</td>\n      <td>54</td>\n      <td>Emergency</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>5</td>\n      <td>42</td>\n      <td>Emergency</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>6</td>\n      <td>76</td>\n      <td>Urgent</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>7</td>\n      <td>57</td>\n      <td>Emergency</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>8</td>\n      <td>42</td>\n      <td>Emergency</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>9</td>\n      <td>43</td>\n      <td>Scheduled</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>10</td>\n      <td>27</td>\n      <td>Urgent</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"metadata":{}}],"execution_count":23},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"Write example python code that generates synthetic healthcare readmission data stored in a dataframe. From this write code that builds a catboost model that predicts readmission outcomes. Also write code to calculate and print performance\", max_tokens=3000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"7b8ff97243cb47a280d6a3295141fbe2","allow_embed":"code_output","source_hash":"ad1ce1e2","execution_start":1673364529638,"execution_millis":30591,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":" metrics\n\n## Generate Synthetic Healthcare Readmission Data\n\nimport pandas as pd \nimport numpy as np \n\n# set the seed for reproducibility \nnp.random.seed(1)\n\n# create dataframe \ndf = pd.DataFrame(np.random.randint(0, 100, size=(100, 10)), columns=['age','gender','length_of_stay','diagnosis','NIV','laboratory','past_hospitalizations','medications','bmi','readmission'])\n\n# add labels to data frame \ndf['age'] = np.random.randint(20, 80, size=(100))\ndf['gender'] = np.random.randint(1, 2, size=(100))\ndf['length_of_stay'] = np.random.randint(2, 14, size=(100))\ndf['diagnosis'] = np.random.randint(1, 5, size=(100))\ndf['NIV'] = np.random.randint(0, 2, size=(100))\ndf['laboratory'] = np.random.randint(1, 6, size=(100))\ndf['past_hospitalizations'] = np.random.randint(0, 10, size=(100))\ndf['medications'] = np.random.randint(1, 6, size=(100))\ndf['bmi'] = np.random.randint(18, 35, size=(100))\ndf['readmission'] = np.random.randint(0, 2, size=(100))\n\n# print the dataframe \nprint(df)\n\n## Build a CatBoost Model\nfrom catboost import CatBoostClassifier\nfrom sklearn.metrics import confusion_matrix\n\n# separate X and y\nX = df.iloc[:, 0:9]\ny = df.iloc[:, 9]\n\n# initialize catboost classifier \ncat_clf = CatBoostClassifier(iterations=50,\n                            learning_rate=0.3,\n                            depth=8,\n                            eval_metric='Accuracy',\n                            random_seed=42)\n\n# fit the model \ncat_clf.fit(X, y)\n\n# predict values\ny_pred = cat_clf.predict(X)\n\n# print confusion matrix\nconf_mat = confusion_matrix(y, y_pred)\nprint(conf_mat)\n\n## Calculate and Print Performance Metrics\nfrom sklearn.metrics import accuracy_score, precision_score, recall_score\n\n# calculate performance metrics\nacc = accuracy_score(y, y_pred)\nprecision = precision_score(y, y_pred)\nrecall = recall_score(y, y_pred)\n\n# print performance metrics \nprint(\"Accuracy: {}\".format(acc))\nprint(\"Precision: {}\".format(precision))\nprint(\"Recall: {}\".format(recall))\n","output_type":"stream"}],"execution_count":27},{"cell_type":"code","source":"## Generate Synthetic Healthcare Readmission Data\n\nimport pandas as pd \nimport numpy as np \n\n# set the seed for reproducibility \nnp.random.seed(1)\n\n# create dataframe \ndf = pd.DataFrame(np.random.randint(0, 100, size=(100, 10)), columns=['age','gender','length_of_stay','diagnosis','NIV','laboratory','past_hospitalizations','medications','bmi','readmission'])\n\n# add labels to data frame \ndf['age'] = np.random.randint(20, 80, size=(100))\ndf['gender'] = np.random.randint(1, 2, size=(100))\ndf['length_of_stay'] = np.random.randint(2, 14, size=(100))\ndf['diagnosis'] = np.random.randint(1, 5, size=(100))\ndf['NIV'] = np.random.randint(0, 2, size=(100))\ndf['laboratory'] = np.random.randint(1, 6, size=(100))\ndf['past_hospitalizations'] = np.random.randint(0, 10, size=(100))\ndf['medications'] = np.random.randint(1, 6, size=(100))\ndf['bmi'] = np.random.randint(18, 35, size=(100))\ndf['readmission'] = np.random.randint(0, 2, size=(100))\n\n# print the dataframe \nprint(df)\n\n## Build a CatBoost Model\nfrom catboost import CatBoostClassifier\nfrom sklearn.metrics import confusion_matrix\n\n# separate X and y\nX = df.iloc[:, 0:9]\ny = df.iloc[:, 9]\n\n# initialize catboost classifier \ncat_clf = CatBoostClassifier(iterations=50,\n                            learning_rate=0.3,\n                            depth=8,\n                            eval_metric='Accuracy',\n                            random_seed=42)\n\n# fit the model \ncat_clf.fit(X, y)\n\n# predict values\ny_pred = cat_clf.predict(X)\n\n# print confusion matrix\nconf_mat = confusion_matrix(y, y_pred)\nprint(conf_mat)\n\n## Calculate and Print Performance Metrics\nfrom sklearn.metrics import accuracy_score, precision_score, recall_score\n\n# calculate performance metrics\nacc = accuracy_score(y, y_pred)\nprecision = precision_score(y, y_pred)\nrecall = recall_score(y, y_pred)\n\n# print performance metrics \nprint(\"Accuracy: {}\".format(acc))\nprint(\"Precision: {}\".format(precision))\nprint(\"Recall: {}\".format(recall))","metadata":{"tags":[],"cell_id":"bc718c65f54440b594932296ba2619e2","allow_embed":"code_output","source_hash":"2aa4be45","execution_start":1673364619253,"execution_millis":113,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"    age  gender  length_of_stay  diagnosis  NIV  laboratory  \\\n0    79       1              11          3    1           3   \n1    22       1              13          3    1           1   \n2    30       1               4          4    0           3   \n3    47       1               5          1    1           4   \n4    25       1              10          4    1           4   \n..  ...     ...             ...        ...  ...         ...   \n95   60       1               5          4    0           4   \n96   71       1               6          3    0           5   \n97   62       1              13          3    0           4   \n98   43       1               6          1    1           5   \n99   71       1               6          1    0           5   \n\n    past_hospitalizations  medications  bmi  readmission  \n0                       7            1   19            1  \n1                       6            1   27            0  \n2                       5            1   18            1  \n3                       7            2   30            1  \n4                       2            4   18            1  \n..                    ...          ...  ...          ...  \n95                      1            1   22            1  \n96                      4            1   32            0  \n97                      6            1   21            1  \n98                      8            3   26            1  \n99                      8            1   29            0  \n\n[100 rows x 10 columns]\n0:\tlearn: 0.6000000\ttotal: 171us\tremaining: 8.42ms\n1:\tlearn: 0.7800000\ttotal: 453us\tremaining: 10.9ms\n2:\tlearn: 0.8400000\ttotal: 681us\tremaining: 10.7ms\n3:\tlearn: 0.8300000\ttotal: 886us\tremaining: 10.2ms\n4:\tlearn: 0.8900000\ttotal: 1.1ms\tremaining: 9.88ms\n5:\tlearn: 0.9100000\ttotal: 1.31ms\tremaining: 9.63ms\n6:\tlearn: 0.9300000\ttotal: 1.52ms\tremaining: 9.32ms\n7:\tlearn: 0.9700000\ttotal: 1.71ms\tremaining: 8.99ms\n8:\tlearn: 0.9600000\ttotal: 1.92ms\tremaining: 8.75ms\n9:\tlearn: 0.9600000\ttotal: 2.01ms\tremaining: 8.04ms\n10:\tlearn: 0.9800000\ttotal: 2.22ms\tremaining: 7.86ms\n11:\tlearn: 0.9900000\ttotal: 2.42ms\tremaining: 7.67ms\n12:\tlearn: 0.9900000\ttotal: 2.61ms\tremaining: 7.44ms\n13:\tlearn: 0.9900000\ttotal: 2.81ms\tremaining: 7.21ms\n14:\tlearn: 0.9900000\ttotal: 3.02ms\tremaining: 7.04ms\n15:\tlearn: 1.0000000\ttotal: 3.22ms\tremaining: 6.84ms\n16:\tlearn: 1.0000000\ttotal: 3.42ms\tremaining: 6.64ms\n17:\tlearn: 1.0000000\ttotal: 3.64ms\tremaining: 6.47ms\n18:\tlearn: 1.0000000\ttotal: 3.88ms\tremaining: 6.32ms\n19:\tlearn: 1.0000000\ttotal: 4.08ms\tremaining: 6.12ms\n20:\tlearn: 1.0000000\ttotal: 4.27ms\tremaining: 5.89ms\n21:\tlearn: 1.0000000\ttotal: 4.46ms\tremaining: 5.68ms\n22:\tlearn: 1.0000000\ttotal: 4.67ms\tremaining: 5.48ms\n23:\tlearn: 1.0000000\ttotal: 4.9ms\tremaining: 5.3ms\n24:\tlearn: 1.0000000\ttotal: 5.11ms\tremaining: 5.11ms\n25:\tlearn: 1.0000000\ttotal: 5.3ms\tremaining: 4.89ms\n26:\tlearn: 1.0000000\ttotal: 5.5ms\tremaining: 4.69ms\n27:\tlearn: 1.0000000\ttotal: 5.73ms\tremaining: 4.5ms\n28:\tlearn: 1.0000000\ttotal: 5.94ms\tremaining: 4.3ms\n29:\tlearn: 1.0000000\ttotal: 6.14ms\tremaining: 4.09ms\n30:\tlearn: 1.0000000\ttotal: 6.34ms\tremaining: 3.88ms\n31:\tlearn: 1.0000000\ttotal: 6.54ms\tremaining: 3.68ms\n32:\tlearn: 1.0000000\ttotal: 6.75ms\tremaining: 3.48ms\n33:\tlearn: 1.0000000\ttotal: 6.95ms\tremaining: 3.27ms\n34:\tlearn: 1.0000000\ttotal: 7.17ms\tremaining: 3.07ms\n35:\tlearn: 1.0000000\ttotal: 7.37ms\tremaining: 2.87ms\n36:\tlearn: 1.0000000\ttotal: 7.59ms\tremaining: 2.67ms\n37:\tlearn: 1.0000000\ttotal: 7.8ms\tremaining: 2.46ms\n38:\tlearn: 1.0000000\ttotal: 8.01ms\tremaining: 2.26ms\n39:\tlearn: 1.0000000\ttotal: 8.2ms\tremaining: 2.05ms\n40:\tlearn: 1.0000000\ttotal: 8.43ms\tremaining: 1.85ms\n41:\tlearn: 1.0000000\ttotal: 8.64ms\tremaining: 1.65ms\n42:\tlearn: 1.0000000\ttotal: 8.84ms\tremaining: 1.44ms\n43:\tlearn: 1.0000000\ttotal: 9.04ms\tremaining: 1.23ms\n44:\tlearn: 1.0000000\ttotal: 9.24ms\tremaining: 1.03ms\n45:\tlearn: 1.0000000\ttotal: 9.46ms\tremaining: 822us\n46:\tlearn: 1.0000000\ttotal: 9.66ms\tremaining: 616us\n47:\tlearn: 1.0000000\ttotal: 9.87ms\tremaining: 411us\n48:\tlearn: 1.0000000\ttotal: 10.1ms\tremaining: 205us\n49:\tlearn: 1.0000000\ttotal: 10.3ms\tremaining: 0us\n[[53  0]\n [ 0 47]]\nAccuracy: 1.0\nPrecision: 1.0\nRecall: 1.0\n","output_type":"stream"}],"execution_count":29},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"Write example python code that generates synthetic transaction data stored in a dataframe\", max_tokens=1000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"19a9d0f597574d908151c234cc4c44fe","allow_embed":"code_output","source_hash":"a82ce2a4","execution_start":1673364672681,"execution_millis":8025,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\nimport pandas as pd \nimport numpy as np \n\n#create randomly generated customer data\ncustomer_id = np.arange(1,101) \ncustomer_names = [f'John Doe {x}' for x in range(1,101)] \n\n#create randomly generated transaction data\ntransaction_id = np.arange(1,101)\ndates = [f'2020-07-{x}' for x in range(1,101)]\namounts = np.random.randint(low=1, high=1000, size=(100,)) \n\n#create dataframe with randomly generated data\ntransaction_data = pd.DataFrame({'Customer ID': customer_id, \n                            'Customer Name': customer_names,\n                            'Transaction ID': transaction_id, \n                            'Date': dates, \n                            'Amount': amounts})\n\nprint(transaction_data)\n","output_type":"stream"}],"execution_count":30},{"cell_type":"code","source":"\nimport pandas as pd \nimport numpy as np \n\n#create randomly generated customer data\ncustomer_id = np.arange(1,101) \ncustomer_names = [f'John Doe {x}' for x in range(1,101)] \n\n#create randomly generated transaction data\ntransaction_id = np.arange(1,101)\ndates = [f'2020-07-{x}' for x in range(1,101)]\namounts = np.random.randint(low=1, high=1000, size=(100,)) \n\n#create dataframe with randomly generated data\ntransaction_data = pd.DataFrame({'Customer ID': customer_id, \n                            'Customer Name': customer_names,\n                            'Transaction ID': transaction_id, \n                            'Date': dates, \n                            'Amount': amounts})\n\ntransaction_data","metadata":{"tags":[],"cell_id":"d786bdb042b744c2bda8eb6004a0ff77","allow_embed":"code_output","source_hash":"1307dbc7","execution_start":1673364707400,"execution_millis":52,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","execution_count":31,"data":{"application/vnd.deepnote.dataframe.v3+json":{"column_count":5,"row_count":100,"columns":[{"name":"Customer ID","dtype":"int64","stats":{"unique_count":100,"nan_count":0,"min":"1","max":"100","histogram":[{"bin_start":1,"bin_end":10.9,"count":10},{"bin_start":10.9,"bin_end":20.8,"count":10},{"bin_start":20.8,"bin_end":30.700000000000003,"count":10},{"bin_start":30.700000000000003,"bin_end":40.6,"count":10},{"bin_start":40.6,"bin_end":50.5,"count":10},{"bin_start":50.5,"bin_end":60.400000000000006,"count":10},{"bin_start":60.400000000000006,"bin_end":70.3,"count":10},{"bin_start":70.3,"bin_end":80.2,"count":10},{"bin_start":80.2,"bin_end":90.10000000000001,"count":10},{"bin_start":90.10000000000001,"bin_end":100,"count":10}]}},{"name":"Customer Name","dtype":"object","stats":{"unique_count":100,"nan_count":0,"categories":[{"name":"John Doe 1","count":1},{"name":"John Doe 2","count":1},{"name":"98 others","count":98}]}},{"name":"Transaction ID","dtype":"int64","stats":{"unique_count":100,"nan_count":0,"min":"1","max":"100","histogram":[{"bin_start":1,"bin_end":10.9,"count":10},{"bin_start":10.9,"bin_end":20.8,"count":10},{"bin_start":20.8,"bin_end":30.700000000000003,"count":10},{"bin_start":30.700000000000003,"bin_end":40.6,"count":10},{"bin_start":40.6,"bin_end":50.5,"count":10},{"bin_start":50.5,"bin_end":60.400000000000006,"count":10},{"bin_start":60.400000000000006,"bin_end":70.3,"count":10},{"bin_start":70.3,"bin_end":80.2,"count":10},{"bin_start":80.2,"bin_end":90.10000000000001,"count":10},{"bin_start":90.10000000000001,"bin_end":100,"count":10}]}},{"name":"Date","dtype":"object","stats":{"unique_count":100,"nan_count":0,"categories":[{"name":"2020-07-1","count":1},{"name":"2020-07-2","count":1},{"name":"98 others","count":98}]}},{"name":"Amount","dtype":"int64","stats":{"unique_count":95,"nan_count":0,"min":"1","max":"999","histogram":[{"bin_start":1,"bin_end":100.8,"count":7},{"bin_start":100.8,"bin_end":200.6,"count":6},{"bin_start":200.6,"bin_end":300.4,"count":16},{"bin_start":300.4,"bin_end":400.2,"count":9},{"bin_start":400.2,"bin_end":500,"count":7},{"bin_start":500,"bin_end":599.8,"count":8},{"bin_start":599.8,"bin_end":699.6,"count":10},{"bin_start":699.6,"bin_end":799.4,"count":9},{"bin_start":799.4,"bin_end":899.1999999999999,"count":13},{"bin_start":899.1999999999999,"bin_end":999,"count":15}]}},{"name":"_deepnote_index_column","dtype":"int64"}],"rows":[{"Customer ID":"1","Customer Name":"John Doe 1","Transaction ID":"1","Date":"2020-07-1","Amount":"138","_deepnote_index_column":"0"},{"Customer ID":"2","Customer Name":"John Doe 2","Transaction ID":"2","Date":"2020-07-2","Amount":"373","_deepnote_index_column":"1"},{"Customer ID":"3","Customer Name":"John Doe 3","Transaction ID":"3","Date":"2020-07-3","Amount":"751","_deepnote_index_column":"2"},{"Customer ID":"4","Customer Name":"John Doe 4","Transaction ID":"4","Date":"2020-07-4","Amount":"385","_deepnote_index_column":"3"},{"Customer ID":"5","Customer Name":"John Doe 5","Transaction ID":"5","Date":"2020-07-5","Amount":"744","_deepnote_index_column":"4"},{"Customer ID":"6","Customer Name":"John Doe 6","Transaction ID":"6","Date":"2020-07-6","Amount":"56","_deepnote_index_column":"5"},{"Customer ID":"7","Customer Name":"John Doe 7","Transaction ID":"7","Date":"2020-07-7","Amount":"492","_deepnote_index_column":"6"},{"Customer ID":"8","Customer Name":"John Doe 8","Transaction ID":"8","Date":"2020-07-8","Amount":"622","_deepnote_index_column":"7"},{"Customer ID":"9","Customer Name":"John Doe 9","Transaction ID":"9","Date":"2020-07-9","Amount":"582","_deepnote_index_column":"8"},{"Customer ID":"10","Customer Name":"John Doe 10","Transaction ID":"10","Date":"2020-07-10","Amount":"267","_deepnote_index_column":"9"}]},"text/plain":"    Customer ID Customer Name  Transaction ID         Date  Amount\n0             1    John Doe 1               1    2020-07-1     138\n1             2    John Doe 2               2    2020-07-2     373\n2             3    John Doe 3               3    2020-07-3     751\n3             4    John Doe 4               4    2020-07-4     385\n4             5    John Doe 5               5    2020-07-5     744\n..          ...           ...             ...          ...     ...\n95           96   John Doe 96              96   2020-07-96     895\n96           97   John Doe 97              97   2020-07-97     984\n97           98   John Doe 98              98   2020-07-98     424\n98           99   John Doe 99              99   2020-07-99     294\n99          100  John Doe 100             100  2020-07-100     391\n\n[100 rows x 5 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Customer ID</th>\n      <th>Customer Name</th>\n      <th>Transaction ID</th>\n      <th>Date</th>\n      <th>Amount</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>John Doe 1</td>\n      <td>1</td>\n      <td>2020-07-1</td>\n      <td>138</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>John Doe 2</td>\n      <td>2</td>\n      <td>2020-07-2</td>\n      <td>373</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3</td>\n      <td>John Doe 3</td>\n      <td>3</td>\n      <td>2020-07-3</td>\n      <td>751</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4</td>\n      <td>John Doe 4</td>\n      <td>4</td>\n      <td>2020-07-4</td>\n      <td>385</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>5</td>\n      <td>John Doe 5</td>\n      <td>5</td>\n      <td>2020-07-5</td>\n      <td>744</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>95</th>\n      <td>96</td>\n      <td>John Doe 96</td>\n      <td>96</td>\n      <td>2020-07-96</td>\n      <td>895</td>\n    </tr>\n    <tr>\n      <th>96</th>\n      <td>97</td>\n      <td>John Doe 97</td>\n      <td>97</td>\n      <td>2020-07-97</td>\n      <td>984</td>\n    </tr>\n    <tr>\n      <th>97</th>\n      <td>98</td>\n      <td>John Doe 98</td>\n      <td>98</td>\n      <td>2020-07-98</td>\n      <td>424</td>\n    </tr>\n    <tr>\n      <th>98</th>\n      <td>99</td>\n      <td>John Doe 99</td>\n      <td>99</td>\n      <td>2020-07-99</td>\n      <td>294</td>\n    </tr>\n    <tr>\n      <th>99</th>\n      <td>100</td>\n      <td>John Doe 100</td>\n      <td>100</td>\n      <td>2020-07-100</td>\n      <td>391</td>\n    </tr>\n  </tbody>\n</table>\n<p>100 rows × 5 columns</p>\n</div>"},"metadata":{}}],"execution_count":31},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"Write example python code that generates synthetic transaction data stored in a dataframe. Include customer ID, transaction amount, item ID, item name, age, gender, and zipcode\", max_tokens=2000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"806cdf0729bf4fff9a912247c8352cfc","allow_embed":"code_output","source_hash":"ae8986e5","execution_start":1673364810801,"execution_millis":13798,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n\nimport pandas as pd\nimport numpy as np\n\nrows = ['customer_ID', 'transaction_amnt', 'item_ID', 'item_name', 'age', 'gender', 'zipcode']\n\ndata = pd.DataFrame(columns=rows)  \n\nfor i in range(1,100):\n        customer_ID = int( np.random.uniform(100,600-100)) \n        transaction_amnt = np.random.uniform(1.25, 10.00)\n        item_ID = int( np.random.uniform(1,35))\n        item_name = np.random.choice([\"phone\", \"tablet\", \"laptop\", \"smartwatch\"])\n        age = int( np.random.uniform(17,75)) \n        gender = np.random.choice([\"male\", \"female\"]) \n        zipcode = np.random.choice([\"98101\", \"98200\", \"98469\", \"98801\"])\n        data.loc[i] = [customer_ID, transaction_amnt, item_ID, item_name, age, gender, zipcode]\n\nprint (data)\n","output_type":"stream"}],"execution_count":34},{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\n\nrows = ['customer_ID', 'transaction_amnt', 'item_ID', 'item_name', 'age', 'gender', 'zipcode']\n\ndata = pd.DataFrame(columns=rows)  \n\nfor i in range(1,100):\n        customer_ID = int( np.random.uniform(100,600-100)) \n        transaction_amnt = np.random.uniform(1.25, 10.00)\n        item_ID = int( np.random.uniform(1,35))\n        item_name = np.random.choice([\"phone\", \"tablet\", \"laptop\", \"smartwatch\"])\n        age = int( np.random.uniform(17,75)) \n        gender = np.random.choice([\"male\", \"female\"]) \n        zipcode = np.random.choice([\"98101\", \"98200\", \"98469\", \"98801\"])\n        data.loc[i] = [customer_ID, transaction_amnt, item_ID, item_name, age, gender, zipcode]\n\ndata\n","metadata":{"tags":[],"cell_id":"8baa29620421412782c494da99099734","allow_embed":"code_output","source_hash":"e22e09cb","execution_start":1673364853257,"execution_millis":248,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","execution_count":35,"data":{"application/vnd.deepnote.dataframe.v3+json":{"column_count":7,"row_count":99,"columns":[{"name":"customer_ID","dtype":"object","stats":{"unique_count":87,"nan_count":0,"categories":[{"name":"322","count":2},{"name":"146","count":2},{"name":"85 others","count":95}]}},{"name":"transaction_amnt","dtype":"float64","stats":{"unique_count":99,"nan_count":0,"min":"1.3676709942205518","max":"9.987054437035802","histogram":[{"bin_start":1.3676709942205518,"bin_end":2.229609338502077,"count":9},{"bin_start":2.229609338502077,"bin_end":3.091547682783602,"count":8},{"bin_start":3.091547682783602,"bin_end":3.953486027065127,"count":9},{"bin_start":3.953486027065127,"bin_end":4.815424371346652,"count":15},{"bin_start":4.815424371346652,"bin_end":5.677362715628177,"count":12},{"bin_start":5.677362715628177,"bin_end":6.539301059909702,"count":8},{"bin_start":6.539301059909702,"bin_end":7.401239404191227,"count":8},{"bin_start":7.401239404191227,"bin_end":8.263177748472753,"count":10},{"bin_start":8.263177748472753,"bin_end":9.125116092754277,"count":9},{"bin_start":9.125116092754277,"bin_end":9.987054437035802,"count":11}]}},{"name":"item_ID","dtype":"object","stats":{"unique_count":32,"nan_count":0,"categories":[{"name":"32","count":7},{"name":"14","count":6},{"name":"30 others","count":86}]}},{"name":"item_name","dtype":"object","stats":{"unique_count":4,"nan_count":0,"categories":[{"name":"tablet","count":28},{"name":"laptop","count":24},{"name":"2 others","count":47}]}},{"name":"age","dtype":"object","stats":{"unique_count":47,"nan_count":0,"categories":[{"name":"53","count":5},{"name":"26","count":4},{"name":"45 others","count":90}]}},{"name":"gender","dtype":"object","stats":{"unique_count":2,"nan_count":0,"categories":[{"name":"female","count":50},{"name":"male","count":49}]}},{"name":"zipcode","dtype":"object","stats":{"unique_count":4,"nan_count":0,"categories":[{"name":"98101","count":28},{"name":"98801","count":25},{"name":"2 others","count":46}]}},{"name":"_deepnote_index_column","dtype":"int64"}],"rows":[{"customer_ID":"321","transaction_amnt":"4.713675785008061","item_ID":"25","item_name":"laptop","age":"26","gender":"female","zipcode":"98101","_deepnote_index_column":"1"},{"customer_ID":"128","transaction_amnt":"9.813387045135537","item_ID":"9","item_name":"smartwatch","age":"43","gender":"male","zipcode":"98469","_deepnote_index_column":"2"},{"customer_ID":"490","transaction_amnt":"4.214857963236753","item_ID":"31","item_name":"phone","age":"48","gender":"female","zipcode":"98200","_deepnote_index_column":"3"},{"customer_ID":"322","transaction_amnt":"5.9467228169965605","item_ID":"14","item_name":"phone","age":"53","gender":"female","zipcode":"98801","_deepnote_index_column":"4"},{"customer_ID":"162","transaction_amnt":"1.6692301678150834","item_ID":"18","item_name":"laptop","age":"71","gender":"male","zipcode":"98200","_deepnote_index_column":"5"},{"customer_ID":"491","transaction_amnt":"5.1088079285776224","item_ID":"26","item_name":"tablet","age":"26","gender":"male","zipcode":"98101","_deepnote_index_column":"6"},{"customer_ID":"170","transaction_amnt":"9.780168421011425","item_ID":"10","item_name":"tablet","age":"61","gender":"male","zipcode":"98101","_deepnote_index_column":"7"},{"customer_ID":"319","transaction_amnt":"8.258702363235615","item_ID":"23","item_name":"tablet","age":"23","gender":"female","zipcode":"98101","_deepnote_index_column":"8"},{"customer_ID":"302","transaction_amnt":"3.994470838038586","item_ID":"32","item_name":"laptop","age":"65","gender":"female","zipcode":"98801","_deepnote_index_column":"9"},{"customer_ID":"489","transaction_amnt":"9.987054437035802","item_ID":"5","item_name":"laptop","age":"73","gender":"male","zipcode":"98801","_deepnote_index_column":"10"}]},"text/plain":"   customer_ID  transaction_amnt item_ID   item_name age  gender zipcode\n1          321          4.713676      25      laptop  26  female   98101\n2          128          9.813387       9  smartwatch  43    male   98469\n3          490          4.214858      31       phone  48  female   98200\n4          322          5.946723      14       phone  53  female   98801\n5          162          1.669230      18      laptop  71    male   98200\n..         ...               ...     ...         ...  ..     ...     ...\n95         195          9.636766      13       phone  47    male   98801\n96         425          8.315732      22  smartwatch  49  female   98101\n97         146          1.455586      19  smartwatch  69  female   98101\n98         438          8.426772      17       phone  26  female   98101\n99         246          4.782375       4      tablet  28    male   98101\n\n[99 rows x 7 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>customer_ID</th>\n      <th>transaction_amnt</th>\n      <th>item_ID</th>\n      <th>item_name</th>\n      <th>age</th>\n      <th>gender</th>\n      <th>zipcode</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>321</td>\n      <td>4.713676</td>\n      <td>25</td>\n      <td>laptop</td>\n      <td>26</td>\n      <td>female</td>\n      <td>98101</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>128</td>\n      <td>9.813387</td>\n      <td>9</td>\n      <td>smartwatch</td>\n      <td>43</td>\n      <td>male</td>\n      <td>98469</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>490</td>\n      <td>4.214858</td>\n      <td>31</td>\n      <td>phone</td>\n      <td>48</td>\n      <td>female</td>\n      <td>98200</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>322</td>\n      <td>5.946723</td>\n      <td>14</td>\n      <td>phone</td>\n      <td>53</td>\n      <td>female</td>\n      <td>98801</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>162</td>\n      <td>1.669230</td>\n      <td>18</td>\n      <td>laptop</td>\n      <td>71</td>\n      <td>male</td>\n      <td>98200</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>95</th>\n      <td>195</td>\n      <td>9.636766</td>\n      <td>13</td>\n      <td>phone</td>\n      <td>47</td>\n      <td>male</td>\n      <td>98801</td>\n    </tr>\n    <tr>\n      <th>96</th>\n      <td>425</td>\n      <td>8.315732</td>\n      <td>22</td>\n      <td>smartwatch</td>\n      <td>49</td>\n      <td>female</td>\n      <td>98101</td>\n    </tr>\n    <tr>\n      <th>97</th>\n      <td>146</td>\n      <td>1.455586</td>\n      <td>19</td>\n      <td>smartwatch</td>\n      <td>69</td>\n      <td>female</td>\n      <td>98101</td>\n    </tr>\n    <tr>\n      <th>98</th>\n      <td>438</td>\n      <td>8.426772</td>\n      <td>17</td>\n      <td>phone</td>\n      <td>26</td>\n      <td>female</td>\n      <td>98101</td>\n    </tr>\n    <tr>\n      <th>99</th>\n      <td>246</td>\n      <td>4.782375</td>\n      <td>4</td>\n      <td>tablet</td>\n      <td>28</td>\n      <td>male</td>\n      <td>98101</td>\n    </tr>\n  </tbody>\n</table>\n<p>99 rows × 7 columns</p>\n</div>"},"metadata":{}}],"execution_count":35},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\" list some good public datasets\", max_tokens=1000)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"2aaf89e048084118b09d5dc8d5973997","allow_embed":"code_output","source_hash":"799d7981","execution_start":1673364891721,"execution_millis":2602,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n1. US Census Data\n2. Enron Email Dataset\n3. Global Open Data Index\n4. Air Quality Monitoring Data\n5. New York City Taxi Trip Data\n6. IMF Data\n7. World Bank Open Data\n8. Google Books Ngrams Dataset\n9. Amazon Reviews Dataset\n10. UCI Machine Learning Repository\n","output_type":"stream"}],"execution_count":36},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\" list some good public datasets under apache 2.0 license. provide links to their source\", max_tokens=1000, temperature=0)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"dc005a6ff705440382a1d7733cad7653","allow_embed":"code_output","source_hash":"763a0cd0","execution_start":1673364936215,"execution_millis":7205,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n1. OpenStreetMap: https://www.openstreetmap.org/\n2. US Census Data: https://www.census.gov/data.html\n3. Google Books Ngrams: https://aws.amazon.com/datasets/google-books-ngrams/\n4. Wikipedia: https://dumps.wikimedia.org/enwiki/\n5. US Government Spending Data: https://www.usaspending.gov/\n6. World Bank Open Data: https://data.worldbank.org/\n7. Common Crawl: http://commoncrawl.org/\n8. Open Images: https://storage.googleapis.com/openimages/web/index.html\n9. OpenFlights: https://openflights.org/data.html\n10. GDELT: http://data.gdeltproject.org/\n","output_type":"stream"}],"execution_count":38},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\" list some good public datasets under apache 2.0 license. provide links to their source and descriptions\", max_tokens=1000, temperature=0)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"8b63efe556cf4310ab44b844c761d2f5","allow_embed":"code_output","source_hash":"d9e6cfd6","execution_start":1673364954767,"execution_millis":11736,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n1. OpenStreetMap: OpenStreetMap is a free, editable map of the world, created and maintained by volunteers and available for use under an open license. It contains millions of data points, including roads, buildings, and points of interest. Source: https://www.openstreetmap.org/\n\n2. Google Books Ngrams: Google Books Ngrams is a dataset of over 5 million books from Google Books, spanning from 1500 to 2008. It contains word counts for each year, allowing researchers to track the usage of words over time. Source: https://aws.amazon.com/datasets/google-books-ngrams/\n\n3. Wikipedia: Wikipedia is a free, open-source encyclopedia with millions of articles in hundreds of languages. It is available for use under the Creative Commons Attribution-ShareAlike license. Source: https://www.wikipedia.org/\n\n4. Common Crawl: Common Crawl is a large-scale web crawl that collects data from over 5 billion webpages. It is available for use under the Apache 2.0 license. Source: https://commoncrawl.org/\n\n5. Open Images Dataset: The Open Images Dataset is a collection of 9 million images annotated with labels spanning over 6000 categories. It is available for use under the Apache 2.0 license. Source: https://storage.googleapis.com/openimages/web/index.html\n","output_type":"stream"}],"execution_count":39},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"What are some emerging machine learning use-cases in social media?\", max_tokens=1000, temperature=0)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"5eadc317599a441d886169d26d2060c1","allow_embed":"code_output","source_hash":"fa7443c9","execution_start":1673365110828,"execution_millis":4087,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n1. Automated Content Curation: Automatically curating content from social media platforms to create personalized content feeds for users.\n\n2. Sentiment Analysis: Analyzing user sentiment from social media posts to gain insights into customer opinions and preferences.\n\n3. Social Media Monitoring: Using machine learning algorithms to monitor social media conversations and detect potential issues or trends.\n\n4. Social Media Advertising: Leveraging machine learning to optimize social media advertising campaigns and target the right audience.\n\n5. Social Media Recommendations: Using machine learning to recommend content to users based on their interests and preferences.\n","output_type":"stream"}],"execution_count":null},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"What are some emerging machine learning use-cases in healthcare?\", max_tokens=1000, temperature=0)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"a08e5d3197504ba499f8de9b63c83090","allow_embed":"code_output","source_hash":"f3e455d","execution_start":1673365252249,"execution_millis":5894,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n1. Automated Diagnosis: Machine learning algorithms can be used to analyze patient data and medical images to detect and diagnose diseases.\n\n2. Personalized Medicine: Machine learning algorithms can be used to analyze patient data and medical images to create personalized treatment plans for each patient.\n\n3. Drug Discovery: Machine learning algorithms can be used to analyze large datasets of chemical compounds to identify potential new drugs.\n\n4. Clinical Decision Support: Machine learning algorithms can be used to analyze patient data and medical images to provide clinicians with real-time decision support.\n\n5. Predictive Analytics: Machine learning algorithms can be used to analyze patient data and medical images to predict future health outcomes.\n","output_type":"stream"}],"execution_count":44},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"What are some good research questions on using deep learning for image detection?\", max_tokens=1000, temperature=0)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"77d3e37fec824ba69d71991b9c73298e","allow_embed":"code_output","source_hash":"543c1ba7","execution_start":1673365541881,"execution_millis":7904,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n1. What are the most effective deep learning architectures for image detection?\n2. How can deep learning be used to improve the accuracy of image detection?\n3. What are the most effective methods for training deep learning models for image detection?\n4. How can deep learning be used to detect objects in images with varying levels of complexity?\n5. How can deep learning be used to detect objects in images with varying levels of illumination?\n6. How can deep learning be used to detect objects in images with varying levels of noise?\n7. How can deep learning be used to detect objects in images with varying levels of resolution?\n8. How can deep learning be used to detect objects in images with varying levels of occlusion?\n9. How can deep learning be used to detect objects in images with varying levels of background clutter?\n10. How can deep learning be used to detect objects in images with varying levels of rotation?\n","output_type":"stream"}],"execution_count":45},{"cell_type":"code","source":"completion = openai.Completion.create(engine=\"text-davinci-003\", prompt=\"What are some good research questions related to NLP transformer models?\", max_tokens=1000, temperature=0)\nprint(completion.choices[0]['text'])","metadata":{"tags":[],"cell_id":"959960a41e784d279afadf9e891c0377","allow_embed":"code_output","source_hash":"649001ea","execution_start":1673365855245,"execution_millis":8775,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"\n\n1. How can transformer models be used to improve the accuracy of natural language processing tasks?\n2. What are the most effective methods for training transformer models for natural language processing tasks?\n3. How can transformer models be used to improve the efficiency of natural language processing tasks?\n4. What are the most effective methods for optimizing transformer models for natural language processing tasks?\n5. How can transformer models be used to improve the interpretability of natural language processing tasks?\n6. What are the most effective methods for deploying transformer models for natural language processing tasks?\n7. How can transformer models be used to improve the scalability of natural language processing tasks?\n8. What are the most effective methods for combining transformer models with other natural language processing techniques?\n9. How can transformer models be used to improve the robustness of natural language processing tasks?\n10. What are the most effective methods for evaluating transformer models for natural language processing tasks?\n","output_type":"stream"}],"execution_count":49},{"cell_type":"markdown","source":"<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=f655b25a-49ff-4873-9275-55f22845e8df' target=\"_blank\">\n<img alt='Created in deepnote.com' style='display:inline;max-height:16px;margin:0px;margin-right:7.5px;' src='data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iODBweCIgaGVpZ2h0PSI4MHB4IiB2aWV3Qm94PSIwIDAgODAgODAiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDU0LjEgKDc2NDkwKSAtIGh0dHBzOi8vc2tldGNoYXBwLmNvbSAtLT4KICAgIDx0aXRsZT5Hcm91cCAzPC90aXRsZT4KICAgIDxkZXNjPkNyZWF0ZWQgd2l0aCBTa2V0Y2guPC9kZXNjPgogICAgPGcgaWQ9IkxhbmRpbmciIHN0cm9rZT0ibm9uZSIgc3Ryb2tlLXdpZHRoPSIxIiBmaWxsPSJub25lIiBmaWxsLXJ1bGU9ImV2ZW5vZGQiPgogICAgICAgIDxnIGlkPSJBcnRib2FyZCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTEyMzUuMDAwMDAwLCAtNzkuMDAwMDAwKSI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0zIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgxMjM1LjAwMDAwMCwgNzkuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8cG9seWdvbiBpZD0iUGF0aC0yMCIgZmlsbD0iIzAyNjVCNCIgcG9pbnRzPSIyLjM3NjIzNzYyIDgwIDM4LjA0NzY2NjcgODAgNTcuODIxNzgyMiA3My44MDU3NTkyIDU3LjgyMTc4MjIgMzIuNzU5MjczOSAzOS4xNDAyMjc4IDMxLjY4MzE2ODMiPjwvcG9seWdvbj4KICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0zNS4wMDc3MTgsODAgQzQyLjkwNjIwMDcsNzYuNDU0OTM1OCA0Ny41NjQ5MTY3LDcxLjU0MjI2NzEgNDguOTgzODY2LDY1LjI2MTk5MzkgQzUxLjExMjI4OTksNTUuODQxNTg0MiA0MS42NzcxNzk1LDQ5LjIxMjIyODQgMjUuNjIzOTg0Niw0OS4yMTIyMjg0IEMyNS40ODQ5Mjg5LDQ5LjEyNjg0NDggMjkuODI2MTI5Niw0My4yODM4MjQ4IDM4LjY0NzU4NjksMzEuNjgzMTY4MyBMNzIuODcxMjg3MSwzMi41NTQ0MjUgTDY1LjI4MDk3Myw2Ny42NzYzNDIxIEw1MS4xMTIyODk5LDc3LjM3NjE0NCBMMzUuMDA3NzE4LDgwIFoiIGlkPSJQYXRoLTIyIiBmaWxsPSIjMDAyODY4Ij48L3BhdGg+CiAgICAgICAgICAgICAgICA8cGF0aCBkPSJNMCwzNy43MzA0NDA1IEwyNy4xMTQ1MzcsMC4yNTcxMTE0MzYgQzYyLjM3MTUxMjMsLTEuOTkwNzE3MDEgODAsMTAuNTAwMzkyNyA4MCwzNy43MzA0NDA1IEM4MCw2NC45NjA0ODgyIDY0Ljc3NjUwMzgsNzkuMDUwMzQxNCAzNC4zMjk1MTEzLDgwIEM0Ny4wNTUzNDg5LDc3LjU2NzA4MDggNTMuNDE4MjY3Nyw3MC4zMTM2MTAzIDUzLjQxODI2NzcsNTguMjM5NTg4NSBDNTMuNDE4MjY3Nyw0MC4xMjg1NTU3IDM2LjMwMzk1NDQsMzcuNzMwNDQwNSAyNS4yMjc0MTcsMzcuNzMwNDQwNSBDMTcuODQzMDU4NiwzNy43MzA0NDA1IDkuNDMzOTE5NjYsMzcuNzMwNDQwNSAwLDM3LjczMDQ0MDUgWiIgaWQ9IlBhdGgtMTkiIGZpbGw9IiMzNzkzRUYiPjwvcGF0aD4KICAgICAgICAgICAgPC9nPgogICAgICAgIDwvZz4KICAgIDwvZz4KPC9zdmc+' > </img>\nCreated in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>","metadata":{"tags":[],"created_in_deepnote_cell":true,"deepnote_cell_type":"markdown"}}],"nbformat":4,"nbformat_minor":0,"metadata":{"deepnote":{},"orig_nbformat":2,"deepnote_notebook_id":"226ddc2337ad4c80a9c176feb573faf1","deepnote_persisted_session":{"createdAt":"2023-01-10T14:45:50.913Z"},"deepnote_execution_queue":[]}}


--------------------------------------------------------------------------------
/imputer.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"code","source":"import pandas as pd \nimport functools\nfrom sklearn.experimental import enable_iterative_imputer\nfrom sklearn.impute import IterativeImputer\nimport numpy as np ","metadata":{"tags":[],"cell_id":"d62b0e3149f34b6da05e2daf6d0f7534","allow_embed":"code_output","source_hash":"5072b50a","execution_start":1666884400995,"execution_millis":1573,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":1},{"cell_type":"code","source":"# impute with zero\ndef simple_imputation(input_function):\n    @functools.wraps(input_function)\n    def simple_imputation_wrapper(*args, **kwargs):\n        return_value = input_function(*args, **kwargs)\n        print(\"--------------Before Imputation--------------\")\n        print(return_value.isnull().sum(axis = 0))\n        return_value.fillna(0, inplace = True)\n        print(\"--------------After Imputation--------------\")\n        print(return_value.isnull().sum(axis = 0))\n        return return_value\n    return simple_imputation_wrapper","metadata":{"tags":[],"cell_id":"0a9a725472834fbeb7b5214be47e6d29","allow_embed":"code_output","source_hash":"c0cc44af","execution_start":1666884402575,"execution_millis":3,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":2},{"cell_type":"code","source":"","metadata":{"tags":[],"cell_id":"bc8e45f3df6d4383b659a0d84928a63f","source_hash":"b623e53d","execution_start":1666884402582,"execution_millis":2,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":2},{"cell_type":"code","source":"@simple_imputation\ndef read_data():\n    df = pd.read_csv(\"wines_data.csv\", sep = \";\")\n    return df","metadata":{"tags":[],"cell_id":"5e138fd1e3af494c82e86f7ba4731199","source_hash":"e271671b","execution_start":1666884402588,"execution_millis":2,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":3},{"cell_type":"code","source":" read_data()","metadata":{"tags":[],"cell_id":"dc9cff612f244c76b95fdb252973f1f8","allow_embed":"code_output","source_hash":"6fc175e1","execution_start":1666884402642,"execution_millis":1460,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"--------------Before Imputation--------------\ncountry                 2\ndesignation         43826\npoints                  5\nprice               13396\nprovince                7\nregion_1            23845\nregion_2            85659\nvariety                 5\nwinery                  5\nlast_year_points        0\ndtype: int64\n--------------After Imputation--------------\ncountry             0\ndesignation         0\npoints              0\nprice               0\nprovince            0\nregion_1            0\nregion_2            0\nvariety             0\nwinery              0\nlast_year_points    0\ndtype: int64\n","output_type":"stream"},{"output_type":"execute_result","execution_count":4,"data":{"application/vnd.deepnote.dataframe.v3+json":{"column_count":10,"row_count":144037,"columns":[{"name":"country","dtype":"object"},{"name":"designation","dtype":"object"},{"name":"points","dtype":"float64"},{"name":"price","dtype":"float64"},{"name":"province","dtype":"object"},{"name":"region_1","dtype":"object"},{"name":"region_2","dtype":"object"},{"name":"variety","dtype":"object"},{"name":"winery","dtype":"object"},{"name":"last_year_points","dtype":"int64"},{"name":"_deepnote_index_column","dtype":"int64"}],"rows":[{"country":"US","designation":"Martha's Vineyard","points":"96.0","price":"235.0","province":"California","region_1":"Napa Valley","region_2":"Napa","variety":"Cabernet Sauvignon","winery":"Heitz","last_year_points":"94","_deepnote_index_column":"0"},{"country":"Spain","designation":"Carodorum Selección Especial Reserva","points":"96.0","price":"110.0","province":"Northern Spain","region_1":"Toro","region_2":"0","variety":"Tinta de Toro","winery":"Bodega Carmen Rodríguez","last_year_points":"92","_deepnote_index_column":"1"},{"country":"US","designation":"Special Selected Late Harvest","points":"96.0","price":"90.0","province":"California","region_1":"Knights Valley","region_2":"Sonoma","variety":"Sauvignon Blanc","winery":"Macauley","last_year_points":"100","_deepnote_index_column":"2"},{"country":"US","designation":"Reserve","points":"96.0","price":"65.0","province":"Oregon","region_1":"Willamette Valley","region_2":"Willamette Valley","variety":"Pinot Noir","winery":"Ponzi","last_year_points":"94","_deepnote_index_column":"3"},{"country":"France","designation":"La Brûlade","points":"95.0","price":"66.0","province":"Provence","region_1":"Bandol","region_2":"0","variety":"Provence red blend","winery":"Domaine de la Bégude","last_year_points":"94","_deepnote_index_column":"4"},{"country":"Spain","designation":"Numanthia","points":"95.0","price":"73.0","province":"Northern Spain","region_1":"Toro","region_2":"0","variety":"Tinta de Toro","winery":"Numanthia","last_year_points":"94","_deepnote_index_column":"5"},{"country":"Spain","designation":"San Román","points":"95.0","price":"65.0","province":"Northern Spain","region_1":"Toro","region_2":"0","variety":"Tinta de Toro","winery":"Maurodos","last_year_points":"89","_deepnote_index_column":"6"},{"country":"Spain","designation":"Carodorum Único Crianza","points":"95.0","price":"110.0","province":"Northern Spain","region_1":"Toro","region_2":"0","variety":"Tinta de Toro","winery":"Bodega Carmen Rodríguez","last_year_points":"88","_deepnote_index_column":"7"},{"country":"US","designation":"Silice","points":"95.0","price":"65.0","province":"Oregon","region_1":"Chehalem Mountains","region_2":"Willamette Valley","variety":"Pinot Noir","winery":"Bergström","last_year_points":"83","_deepnote_index_column":"8"},{"country":"US","designation":"Gap's Crown Vineyard","points":"95.0","price":"60.0","province":"California","region_1":"Sonoma Coast","region_2":"Sonoma","variety":"Pinot Noir","winery":"Blue Farm","last_year_points":"83","_deepnote_index_column":"9"}]},"text/plain":"       country                           designation  points  price  \\\n0           US                     Martha's Vineyard    96.0  235.0   \n1        Spain  Carodorum Selección Especial Reserva    96.0  110.0   \n2           US         Special Selected Late Harvest    96.0   90.0   \n3           US                               Reserve    96.0   65.0   \n4       France                            La Brûlade    95.0   66.0   \n...        ...                                   ...     ...    ...   \n144032   Italy                                     0    91.0   20.0   \n144033  France                        Cuvée Prestige    91.0   27.0   \n144034   Italy                         Terre di Dora    91.0   20.0   \n144035  France                       Grand Brut Rosé    90.0   52.0   \n144036   Italy                                     0    90.0   15.0   \n\n                  province           region_1           region_2  \\\n0               California        Napa Valley               Napa   \n1           Northern Spain               Toro                  0   \n2               California     Knights Valley             Sonoma   \n3                   Oregon  Willamette Valley  Willamette Valley   \n4                 Provence             Bandol                  0   \n...                    ...                ...                ...   \n144032      Southern Italy  Fiano di Avellino                  0   \n144033           Champagne          Champagne                  0   \n144034      Southern Italy  Fiano di Avellino                  0   \n144035           Champagne          Champagne                  0   \n144036  Northeastern Italy         Alto Adige                  0   \n\n                   variety                   winery  last_year_points  \n0       Cabernet Sauvignon                    Heitz                94  \n1            Tinta de Toro  Bodega Carmen Rodríguez                92  \n2          Sauvignon Blanc                 Macauley               100  \n3               Pinot Noir                    Ponzi                94  \n4       Provence red blend     Domaine de la Bégude                94  \n...                    ...                      ...               ...  \n144032         White Blend    Feudi di San Gregorio                84  \n144033     Champagne Blend                H.Germain                83  \n144034         White Blend                Terredora                97  \n144035     Champagne Blend                   Gosset                89  \n144036        Pinot Grigio            Alois Lageder                82  \n\n[144037 rows x 10 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>country</th>\n      <th>designation</th>\n      <th>points</th>\n      <th>price</th>\n      <th>province</th>\n      <th>region_1</th>\n      <th>region_2</th>\n      <th>variety</th>\n      <th>winery</th>\n      <th>last_year_points</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>US</td>\n      <td>Martha's Vineyard</td>\n      <td>96.0</td>\n      <td>235.0</td>\n      <td>California</td>\n      <td>Napa Valley</td>\n      <td>Napa</td>\n      <td>Cabernet Sauvignon</td>\n      <td>Heitz</td>\n      <td>94</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Spain</td>\n      <td>Carodorum Selección Especial Reserva</td>\n      <td>96.0</td>\n      <td>110.0</td>\n      <td>Northern Spain</td>\n      <td>Toro</td>\n      <td>0</td>\n      <td>Tinta de Toro</td>\n      <td>Bodega Carmen Rodríguez</td>\n      <td>92</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>US</td>\n      <td>Special Selected Late Harvest</td>\n      <td>96.0</td>\n      <td>90.0</td>\n      <td>California</td>\n      <td>Knights Valley</td>\n      <td>Sonoma</td>\n      <td>Sauvignon Blanc</td>\n      <td>Macauley</td>\n      <td>100</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>US</td>\n      <td>Reserve</td>\n      <td>96.0</td>\n      <td>65.0</td>\n      <td>Oregon</td>\n      <td>Willamette Valley</td>\n      <td>Willamette Valley</td>\n      <td>Pinot Noir</td>\n      <td>Ponzi</td>\n      <td>94</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>France</td>\n      <td>La Brûlade</td>\n      <td>95.0</td>\n      <td>66.0</td>\n      <td>Provence</td>\n      <td>Bandol</td>\n      <td>0</td>\n      <td>Provence red blend</td>\n      <td>Domaine de la Bégude</td>\n      <td>94</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>144032</th>\n      <td>Italy</td>\n      <td>0</td>\n      <td>91.0</td>\n      <td>20.0</td>\n      <td>Southern Italy</td>\n      <td>Fiano di Avellino</td>\n      <td>0</td>\n      <td>White Blend</td>\n      <td>Feudi di San Gregorio</td>\n      <td>84</td>\n    </tr>\n    <tr>\n      <th>144033</th>\n      <td>France</td>\n      <td>Cuvée Prestige</td>\n      <td>91.0</td>\n      <td>27.0</td>\n      <td>Champagne</td>\n      <td>Champagne</td>\n      <td>0</td>\n      <td>Champagne Blend</td>\n      <td>H.Germain</td>\n      <td>83</td>\n    </tr>\n    <tr>\n      <th>144034</th>\n      <td>Italy</td>\n      <td>Terre di Dora</td>\n      <td>91.0</td>\n      <td>20.0</td>\n      <td>Southern Italy</td>\n      <td>Fiano di Avellino</td>\n      <td>0</td>\n      <td>White Blend</td>\n      <td>Terredora</td>\n      <td>97</td>\n    </tr>\n    <tr>\n      <th>144035</th>\n      <td>France</td>\n      <td>Grand Brut Rosé</td>\n      <td>90.0</td>\n      <td>52.0</td>\n      <td>Champagne</td>\n      <td>Champagne</td>\n      <td>0</td>\n      <td>Champagne Blend</td>\n      <td>Gosset</td>\n      <td>89</td>\n    </tr>\n    <tr>\n      <th>144036</th>\n      <td>Italy</td>\n      <td>0</td>\n      <td>90.0</td>\n      <td>15.0</td>\n      <td>Northeastern Italy</td>\n      <td>Alto Adige</td>\n      <td>0</td>\n      <td>Pinot Grigio</td>\n      <td>Alois Lageder</td>\n      <td>82</td>\n    </tr>\n  </tbody>\n</table>\n<p>144037 rows × 10 columns</p>\n</div>"},"metadata":{}}],"execution_count":4},{"cell_type":"code","source":"df = read_data()\ndf.isnull().sum(axis = 0)","metadata":{"tags":[],"cell_id":"4ff24651309843e2927764d50596ae93","source_hash":"81ba9071","execution_start":1666884403275,"execution_millis":1095,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"--------------Before Imputation--------------\ncountry                 2\ndesignation         43826\npoints                  5\nprice               13396\nprovince                7\nregion_1            23845\nregion_2            85659\nvariety                 5\nwinery                  5\nlast_year_points        0\ndtype: int64\n--------------After Imputation--------------\ncountry             0\ndesignation         0\npoints              0\nprice               0\nprovince            0\nregion_1            0\nregion_2            0\nvariety             0\nwinery              0\nlast_year_points    0\ndtype: int64\n","output_type":"stream"},{"output_type":"execute_result","execution_count":5,"data":{"text/plain":"country             0\ndesignation         0\npoints              0\nprice               0\nprovince            0\nregion_1            0\nregion_2            0\nvariety             0\nwinery              0\nlast_year_points    0\ndtype: int64"},"metadata":{}}],"execution_count":5},{"cell_type":"code","source":"def meanmode_imputation(input_function):\n    @functools.wraps(input_function)\n    def meanmode_imputation_wrapper(*args, **kwargs):\n        return_value = input_function(*args, **kwargs)\n        print(\"--------------Before Mean/Mode Imputation--------------\")\n        print(return_value.isnull().sum(axis = 0))\n        for col in list(return_value.columns):\n            if return_value[col].dtype == float:\n                return_value[col].fillna(return_value[col].mean(), inplace = True)\n            elif return_value[col].dtype.name == 'category':  \n                return_value[col].fillna(return_value[col].mode()[0], inplace = True)\n        print(\"--------------After Mean/Mode Imputation--------------\")\n        print(return_value.isnull().sum(axis = 0))\n        return return_value\n    return meanmode_imputation_wrapper","metadata":{"tags":[],"cell_id":"431f7417c9c04b26939cf1a44b7bee90","allow_embed":"code_output","source_hash":"25e07f36","execution_start":1666884403888,"execution_millis":3,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":6},{"cell_type":"code","source":"@meanmode_imputation\ndef read_data(data_type_dict):\n    df = pd.read_csv(\"wines_data.csv\", sep = \";\")\n    for col in list(df.columns):\n      df[col] = df[col].astype(data_type_dict[col])\n    return df","metadata":{"tags":[],"cell_id":"bb78c01d86c84c10be8e602e78190680","allow_embed":"code_output","source_hash":"642ff961","execution_start":1666884403893,"execution_millis":2,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":7},{"cell_type":"code","source":" data_type_dict = {'country':'category', 'designation':'category',\n'points':'float', 'price':'float', 'province':'category', 'region_1':'category',\n 'region_2':'category', 'variety':'category', 'winery':'category', 'last_year_points':'float'}\ndf = read_data(data_type_dict)","metadata":{"tags":[],"cell_id":"bcb072ac40b740cc93e6acd1b553090b","allow_embed":"code_output","source_hash":"5ef22636","execution_start":1666884403900,"execution_millis":666,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"--------------Before Mean/Mode Imputation--------------\ncountry                 2\ndesignation         43826\npoints                  5\nprice               13396\nprovince                7\nregion_1            23845\nregion_2            85659\nvariety                 5\nwinery                  5\nlast_year_points        0\ndtype: int64\n--------------After Mean/Mode Imputation--------------\ncountry             0\ndesignation         0\npoints              0\nprice               0\nprovince            0\nregion_1            0\nregion_2            0\nvariety             0\nwinery              0\nlast_year_points    0\ndtype: int64\n","output_type":"stream"}],"execution_count":8},{"cell_type":"code","source":"def iterative_imputation(input_function):\n    @functools.wraps(input_function)\n    def iterative_imputation_wrapper(*args, **kwargs):\n        return_value = input_function(*args, **kwargs)\n        print(\"--------------Before Bayesian Ridge Regression Imputation--------------\")\n        print(return_value.isnull().sum(axis = 0))\n        return_num = return_value[['price', 'points', 'last_year_points']]\n        return_cat = return_value.drop(columns=['price', 'points', 'last_year_points'])\n\n        imp_bayesian = IterativeImputer(max_iter=10, random_state=0)\n        imp_bayesian.fit(np.array(return_num))\n        return_num = pd.DataFrame(np.round(imp_bayesian.transform(np.array(return_num))), columns = ['price', 'points', 'last_year_points'])\n        for col in list(return_cat.columns):\n            return_cat[col].fillna(return_cat[col].mode()[0], inplace = True)\n        return_value = pd.concat([return_cat, return_num], axis=1)\n        print(\"--------------After Bayesian Ridge Regression Imputation--------------\")\n        print(return_value.isnull().sum(axis = 0))\n        return return_value\n    return iterative_imputation_wrapper","metadata":{"tags":[],"cell_id":"8bcf361def3549a6bed09465d4f64637","allow_embed":"code_output","source_hash":"ccf70f29","execution_start":1666884404568,"execution_millis":19,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":9},{"cell_type":"code","source":"@iterative_imputation\ndef read_data(data_type_dict):\n    df = pd.read_csv(\"wines_data.csv\", sep = \";\")\n    for col in list(df.columns):\n      df[col] = df[col].astype(data_type_dict[col])\n    return df","metadata":{"tags":[],"cell_id":"3bde52f0e03e4977bb6c5976b9a88b4c","allow_embed":"code_output","source_hash":"f6485403","execution_start":1666884404590,"execution_millis":70578,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":10},{"cell_type":"code","source":" data_type_dict = {'country':'category', 'designation':'category',\n'points':'float', 'price':'float', 'province':'category', 'region_1':'category',\n 'region_2':'category', 'variety':'category', 'winery':'category', 'last_year_points':'float'}\ndf = read_data(data_type_dict)","metadata":{"tags":[],"cell_id":"f725b7390fdd450a98fac67e0993f450","allow_embed":"code_output","source_hash":"5ef22636","execution_start":1666884404600,"execution_millis":1697,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"--------------Before Bayesian Ridge Regression Imputation--------------\ncountry                 2\ndesignation         43826\npoints                  5\nprice               13396\nprovince                7\nregion_1            23845\nregion_2            85659\nvariety                 5\nwinery                  5\nlast_year_points        0\ndtype: int64\n--------------After Bayesian Ridge Regression Imputation--------------\ncountry             0\ndesignation         0\nprovince            0\nregion_1            0\nregion_2            0\nvariety             0\nwinery              0\nprice               0\npoints              0\nlast_year_points    0\ndtype: int64\n","output_type":"stream"}],"execution_count":11},{"cell_type":"code","source":" data_type_dict = {'country':'category', 'designation':'category',\n'points':'float', 'price':'float', 'province':'category', 'region_1':'category',\n 'region_2':'category', 'variety':'category', 'winery':'category', 'last_year_points':'float'}\ndf_original = pd.read_csv(\"wines_data.csv\", sep = \";\")\ndf_imp = read_data(data_type_dict)\n","metadata":{"tags":[],"cell_id":"0f9c67b50e6c45799beb6125b7ff597d","source_hash":"b3caafb9","execution_start":1666884406300,"execution_millis":1982,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"--------------Before Bayesian Ridge Regression Imputation--------------\ncountry                 2\ndesignation         43826\npoints                  5\nprice               13396\nprovince                7\nregion_1            23845\nregion_2            85659\nvariety                 5\nwinery                  5\nlast_year_points        0\ndtype: int64\n--------------After Bayesian Ridge Regression Imputation--------------\ncountry             0\ndesignation         0\nprovince            0\nregion_1            0\nregion_2            0\nvariety             0\nwinery              0\nprice               0\npoints              0\nlast_year_points    0\ndtype: int64\n","output_type":"stream"}],"execution_count":12},{"cell_type":"markdown","source":"<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=e827d9a5-6b96-4401-8c9c-6ec5b955d54f' target=\"_blank\">\n<img alt='Created in deepnote.com' style='display:inline;max-height:16px;margin:0px;margin-right:7.5px;' src='data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iODBweCIgaGVpZ2h0PSI4MHB4IiB2aWV3Qm94PSIwIDAgODAgODAiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDU0LjEgKDc2NDkwKSAtIGh0dHBzOi8vc2tldGNoYXBwLmNvbSAtLT4KICAgIDx0aXRsZT5Hcm91cCAzPC90aXRsZT4KICAgIDxkZXNjPkNyZWF0ZWQgd2l0aCBTa2V0Y2guPC9kZXNjPgogICAgPGcgaWQ9IkxhbmRpbmciIHN0cm9rZT0ibm9uZSIgc3Ryb2tlLXdpZHRoPSIxIiBmaWxsPSJub25lIiBmaWxsLXJ1bGU9ImV2ZW5vZGQiPgogICAgICAgIDxnIGlkPSJBcnRib2FyZCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTEyMzUuMDAwMDAwLCAtNzkuMDAwMDAwKSI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0zIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgxMjM1LjAwMDAwMCwgNzkuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8cG9seWdvbiBpZD0iUGF0aC0yMCIgZmlsbD0iIzAyNjVCNCIgcG9pbnRzPSIyLjM3NjIzNzYyIDgwIDM4LjA0NzY2NjcgODAgNTcuODIxNzgyMiA3My44MDU3NTkyIDU3LjgyMTc4MjIgMzIuNzU5MjczOSAzOS4xNDAyMjc4IDMxLjY4MzE2ODMiPjwvcG9seWdvbj4KICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0zNS4wMDc3MTgsODAgQzQyLjkwNjIwMDcsNzYuNDU0OTM1OCA0Ny41NjQ5MTY3LDcxLjU0MjI2NzEgNDguOTgzODY2LDY1LjI2MTk5MzkgQzUxLjExMjI4OTksNTUuODQxNTg0MiA0MS42NzcxNzk1LDQ5LjIxMjIyODQgMjUuNjIzOTg0Niw0OS4yMTIyMjg0IEMyNS40ODQ5Mjg5LDQ5LjEyNjg0NDggMjkuODI2MTI5Niw0My4yODM4MjQ4IDM4LjY0NzU4NjksMzEuNjgzMTY4MyBMNzIuODcxMjg3MSwzMi41NTQ0MjUgTDY1LjI4MDk3Myw2Ny42NzYzNDIxIEw1MS4xMTIyODk5LDc3LjM3NjE0NCBMMzUuMDA3NzE4LDgwIFoiIGlkPSJQYXRoLTIyIiBmaWxsPSIjMDAyODY4Ij48L3BhdGg+CiAgICAgICAgICAgICAgICA8cGF0aCBkPSJNMCwzNy43MzA0NDA1IEwyNy4xMTQ1MzcsMC4yNTcxMTE0MzYgQzYyLjM3MTUxMjMsLTEuOTkwNzE3MDEgODAsMTAuNTAwMzkyNyA4MCwzNy43MzA0NDA1IEM4MCw2NC45NjA0ODgyIDY0Ljc3NjUwMzgsNzkuMDUwMzQxNCAzNC4zMjk1MTEzLDgwIEM0Ny4wNTUzNDg5LDc3LjU2NzA4MDggNTMuNDE4MjY3Nyw3MC4zMTM2MTAzIDUzLjQxODI2NzcsNTguMjM5NTg4NSBDNTMuNDE4MjY3Nyw0MC4xMjg1NTU3IDM2LjMwMzk1NDQsMzcuNzMwNDQwNSAyNS4yMjc0MTcsMzcuNzMwNDQwNSBDMTcuODQzMDU4NiwzNy43MzA0NDA1IDkuNDMzOTE5NjYsMzcuNzMwNDQwNSAwLDM3LjczMDQ0MDUgWiIgaWQ9IlBhdGgtMTkiIGZpbGw9IiMzNzkzRUYiPjwvcGF0aD4KICAgICAgICAgICAgPC9nPgogICAgICAgIDwvZz4KICAgIDwvZz4KPC9zdmc+' > </img>\nCreated in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>","metadata":{"tags":[],"created_in_deepnote_cell":true,"deepnote_cell_type":"markdown"}}],"nbformat":4,"nbformat_minor":0,"metadata":{"deepnote":{},"orig_nbformat":2,"deepnote_notebook_id":"d50ab12ed7b646ee9d6d24c80167f73f","deepnote_execution_queue":[]}}


--------------------------------------------------------------------------------
/list_comp_generators.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"code","source":"import pandas as pd \n\ndf = pd.read_csv(\"insurance.csv\")","metadata":{"tags":[],"cell_id":"720286d65de54a80a00c3a52f25e5169","allow_embed":"code_output","source_hash":"d97cb9c5","execution_start":1671471973316,"execution_millis":1,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":1},{"cell_type":"code","source":"df.head()","metadata":{"tags":[],"cell_id":"525400f95b4a47dca0943addeb411a79","allow_embed":"code_output","source_hash":"c085b6ba","execution_start":1671471973320,"execution_millis":33,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","execution_count":2,"data":{"application/vnd.deepnote.dataframe.v3+json":{"column_count":7,"row_count":5,"columns":[{"name":"age","dtype":"int64","stats":{"unique_count":5,"nan_count":0,"min":"18","max":"33","histogram":[{"bin_start":18,"bin_end":19.5,"count":2},{"bin_start":19.5,"bin_end":21,"count":0},{"bin_start":21,"bin_end":22.5,"count":0},{"bin_start":22.5,"bin_end":24,"count":0},{"bin_start":24,"bin_end":25.5,"count":0},{"bin_start":25.5,"bin_end":27,"count":0},{"bin_start":27,"bin_end":28.5,"count":1},{"bin_start":28.5,"bin_end":30,"count":0},{"bin_start":30,"bin_end":31.5,"count":0},{"bin_start":31.5,"bin_end":33,"count":2}]}},{"name":"sex","dtype":"object","stats":{"unique_count":2,"nan_count":0,"categories":[{"name":"male","count":4},{"name":"female","count":1}]}},{"name":"bmi","dtype":"float64","stats":{"unique_count":5,"nan_count":0,"min":"22.705","max":"33.77","histogram":[{"bin_start":22.705,"bin_end":23.8115,"count":1},{"bin_start":23.8115,"bin_end":24.918,"count":0},{"bin_start":24.918,"bin_end":26.0245,"count":0},{"bin_start":26.0245,"bin_end":27.131,"count":0},{"bin_start":27.131,"bin_end":28.2375,"count":1},{"bin_start":28.2375,"bin_end":29.344,"count":1},{"bin_start":29.344,"bin_end":30.4505,"count":0},{"bin_start":30.4505,"bin_end":31.557000000000002,"count":0},{"bin_start":31.557000000000002,"bin_end":32.6635,"count":0},{"bin_start":32.6635,"bin_end":33.77,"count":2}]}},{"name":"children","dtype":"int64","stats":{"unique_count":3,"nan_count":0,"min":"0","max":"3","histogram":[{"bin_start":0,"bin_end":0.3,"count":3},{"bin_start":0.3,"bin_end":0.6,"count":0},{"bin_start":0.6,"bin_end":0.8999999999999999,"count":0},{"bin_start":0.8999999999999999,"bin_end":1.2,"count":1},{"bin_start":1.2,"bin_end":1.5,"count":0},{"bin_start":1.5,"bin_end":1.7999999999999998,"count":0},{"bin_start":1.7999999999999998,"bin_end":2.1,"count":0},{"bin_start":2.1,"bin_end":2.4,"count":0},{"bin_start":2.4,"bin_end":2.6999999999999997,"count":0},{"bin_start":2.6999999999999997,"bin_end":3,"count":1}]}},{"name":"smoker","dtype":"object","stats":{"unique_count":2,"nan_count":0,"categories":[{"name":"no","count":4},{"name":"yes","count":1}]}},{"name":"region","dtype":"object","stats":{"unique_count":3,"nan_count":0,"categories":[{"name":"southeast","count":2},{"name":"northwest","count":2},{"name":"southwest","count":1}]}},{"name":"charges","dtype":"float64","stats":{"unique_count":5,"nan_count":0,"min":"1725.5523","max":"21984.47061","histogram":[{"bin_start":1725.5523,"bin_end":3751.444131,"count":1},{"bin_start":3751.444131,"bin_end":5777.335962,"count":2},{"bin_start":5777.335962,"bin_end":7803.227793000001,"count":0},{"bin_start":7803.227793000001,"bin_end":9829.119624,"count":0},{"bin_start":9829.119624,"bin_end":11855.011455,"count":0},{"bin_start":11855.011455,"bin_end":13880.903286,"count":0},{"bin_start":13880.903286,"bin_end":15906.795117,"count":0},{"bin_start":15906.795117,"bin_end":17932.686948000002,"count":1},{"bin_start":17932.686948000002,"bin_end":19958.578779,"count":0},{"bin_start":19958.578779,"bin_end":21984.47061,"count":1}]}},{"name":"_deepnote_index_column","dtype":"int64"}],"rows":[{"age":"19","sex":"female","bmi":"27.9","children":"0","smoker":"yes","region":"southwest","charges":"16884.924","_deepnote_index_column":"0"},{"age":"18","sex":"male","bmi":"33.77","children":"1","smoker":"no","region":"southeast","charges":"1725.5523","_deepnote_index_column":"1"},{"age":"28","sex":"male","bmi":"33.0","children":"3","smoker":"no","region":"southeast","charges":"4449.462","_deepnote_index_column":"2"},{"age":"33","sex":"male","bmi":"22.705","children":"0","smoker":"no","region":"northwest","charges":"21984.47061","_deepnote_index_column":"3"},{"age":"32","sex":"male","bmi":"28.88","children":"0","smoker":"no","region":"northwest","charges":"3866.8552","_deepnote_index_column":"4"}]},"text/plain":"   age     sex     bmi  children smoker     region      charges\n0   19  female  27.900         0    yes  southwest  16884.92400\n1   18    male  33.770         1     no  southeast   1725.55230\n2   28    male  33.000         3     no  southeast   4449.46200\n3   33    male  22.705         0     no  northwest  21984.47061\n4   32    male  28.880         0     no  northwest   3866.85520","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>sex</th>\n      <th>bmi</th>\n      <th>children</th>\n      <th>smoker</th>\n      <th>region</th>\n      <th>charges</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>19</td>\n      <td>female</td>\n      <td>27.900</td>\n      <td>0</td>\n      <td>yes</td>\n      <td>southwest</td>\n      <td>16884.92400</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>18</td>\n      <td>male</td>\n      <td>33.770</td>\n      <td>1</td>\n      <td>no</td>\n      <td>southeast</td>\n      <td>1725.55230</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>28</td>\n      <td>male</td>\n      <td>33.000</td>\n      <td>3</td>\n      <td>no</td>\n      <td>southeast</td>\n      <td>4449.46200</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>33</td>\n      <td>male</td>\n      <td>22.705</td>\n      <td>0</td>\n      <td>no</td>\n      <td>northwest</td>\n      <td>21984.47061</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>32</td>\n      <td>male</td>\n      <td>28.880</td>\n      <td>0</td>\n      <td>no</td>\n      <td>northwest</td>\n      <td>3866.85520</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"metadata":{}}],"execution_count":2},{"cell_type":"code","source":"import numpy as np \ndef log_transform(input_list):\n    return np.log(input_list)","metadata":{"tags":[],"cell_id":"ca49d07ef75d454bbac460fcc5207eb4","allow_embed":"code_output","source_hash":"10f8e841","execution_start":1671471973358,"execution_millis":2,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":3},{"cell_type":"code","source":"bmi_list = list(df['bmi'])\nbmi_lt_map = list(map(log_transform, bmi_list))\ndf['bmi_lt_map'] = bmi_lt_map","metadata":{"tags":[],"cell_id":"6b98a85dc6fa415f9c96b4a76f7ca127","allow_embed":"code_output","source_hash":"9ab65662","execution_start":1671471973402,"execution_millis":0,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":4},{"cell_type":"code","source":"df['bmi_lt_listcomp'] = [np.log(bmi) for bmi in list(df['bmi'])]","metadata":{"tags":[],"cell_id":"b533e1b8442a4d1fbc347dd7bf162e6c","allow_embed":"code_output","source_hash":"3fe3a273","execution_start":1671471973402,"execution_millis":0,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":5},{"cell_type":"code","source":"df.head()","metadata":{"tags":[],"cell_id":"08a80579a5844de0bd619fbe1edba061","source_hash":"c085b6ba","execution_start":1671471973403,"execution_millis":3,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","execution_count":6,"data":{"application/vnd.deepnote.dataframe.v3+json":{"column_count":9,"row_count":5,"columns":[{"name":"age","dtype":"int64","stats":{"unique_count":5,"nan_count":0,"min":"18","max":"33","histogram":[{"bin_start":18,"bin_end":19.5,"count":2},{"bin_start":19.5,"bin_end":21,"count":0},{"bin_start":21,"bin_end":22.5,"count":0},{"bin_start":22.5,"bin_end":24,"count":0},{"bin_start":24,"bin_end":25.5,"count":0},{"bin_start":25.5,"bin_end":27,"count":0},{"bin_start":27,"bin_end":28.5,"count":1},{"bin_start":28.5,"bin_end":30,"count":0},{"bin_start":30,"bin_end":31.5,"count":0},{"bin_start":31.5,"bin_end":33,"count":2}]}},{"name":"sex","dtype":"object","stats":{"unique_count":2,"nan_count":0,"categories":[{"name":"male","count":4},{"name":"female","count":1}]}},{"name":"bmi","dtype":"float64","stats":{"unique_count":5,"nan_count":0,"min":"22.705","max":"33.77","histogram":[{"bin_start":22.705,"bin_end":23.8115,"count":1},{"bin_start":23.8115,"bin_end":24.918,"count":0},{"bin_start":24.918,"bin_end":26.0245,"count":0},{"bin_start":26.0245,"bin_end":27.131,"count":0},{"bin_start":27.131,"bin_end":28.2375,"count":1},{"bin_start":28.2375,"bin_end":29.344,"count":1},{"bin_start":29.344,"bin_end":30.4505,"count":0},{"bin_start":30.4505,"bin_end":31.557000000000002,"count":0},{"bin_start":31.557000000000002,"bin_end":32.6635,"count":0},{"bin_start":32.6635,"bin_end":33.77,"count":2}]}},{"name":"children","dtype":"int64","stats":{"unique_count":3,"nan_count":0,"min":"0","max":"3","histogram":[{"bin_start":0,"bin_end":0.3,"count":3},{"bin_start":0.3,"bin_end":0.6,"count":0},{"bin_start":0.6,"bin_end":0.8999999999999999,"count":0},{"bin_start":0.8999999999999999,"bin_end":1.2,"count":1},{"bin_start":1.2,"bin_end":1.5,"count":0},{"bin_start":1.5,"bin_end":1.7999999999999998,"count":0},{"bin_start":1.7999999999999998,"bin_end":2.1,"count":0},{"bin_start":2.1,"bin_end":2.4,"count":0},{"bin_start":2.4,"bin_end":2.6999999999999997,"count":0},{"bin_start":2.6999999999999997,"bin_end":3,"count":1}]}},{"name":"smoker","dtype":"object","stats":{"unique_count":2,"nan_count":0,"categories":[{"name":"no","count":4},{"name":"yes","count":1}]}},{"name":"region","dtype":"object","stats":{"unique_count":3,"nan_count":0,"categories":[{"name":"southeast","count":2},{"name":"northwest","count":2},{"name":"southwest","count":1}]}},{"name":"charges","dtype":"float64","stats":{"unique_count":5,"nan_count":0,"min":"1725.5523","max":"21984.47061","histogram":[{"bin_start":1725.5523,"bin_end":3751.444131,"count":1},{"bin_start":3751.444131,"bin_end":5777.335962,"count":2},{"bin_start":5777.335962,"bin_end":7803.227793000001,"count":0},{"bin_start":7803.227793000001,"bin_end":9829.119624,"count":0},{"bin_start":9829.119624,"bin_end":11855.011455,"count":0},{"bin_start":11855.011455,"bin_end":13880.903286,"count":0},{"bin_start":13880.903286,"bin_end":15906.795117,"count":0},{"bin_start":15906.795117,"bin_end":17932.686948000002,"count":1},{"bin_start":17932.686948000002,"bin_end":19958.578779,"count":0},{"bin_start":19958.578779,"bin_end":21984.47061,"count":1}]}},{"name":"bmi_lt_map","dtype":"float64","stats":{"unique_count":5,"nan_count":0,"min":"3.122585164549914","max":"3.519572834397476","histogram":[{"bin_start":3.122585164549914,"bin_end":3.1622839315346702,"count":1},{"bin_start":3.1622839315346702,"bin_end":3.2019826985194264,"count":0},{"bin_start":3.2019826985194264,"bin_end":3.2416814655041826,"count":0},{"bin_start":3.2416814655041826,"bin_end":3.2813802324889387,"count":0},{"bin_start":3.2813802324889387,"bin_end":3.321078999473695,"count":0},{"bin_start":3.321078999473695,"bin_end":3.3607777664584515,"count":1},{"bin_start":3.3607777664584515,"bin_end":3.4004765334432077,"count":1},{"bin_start":3.4004765334432077,"bin_end":3.440175300427964,"count":0},{"bin_start":3.440175300427964,"bin_end":3.47987406741272,"count":0},{"bin_start":3.47987406741272,"bin_end":3.519572834397476,"count":2}]}},{"name":"bmi_lt_listcomp","dtype":"float64","stats":{"unique_count":5,"nan_count":0,"min":"3.122585164549914","max":"3.519572834397476","histogram":[{"bin_start":3.122585164549914,"bin_end":3.1622839315346702,"count":1},{"bin_start":3.1622839315346702,"bin_end":3.2019826985194264,"count":0},{"bin_start":3.2019826985194264,"bin_end":3.2416814655041826,"count":0},{"bin_start":3.2416814655041826,"bin_end":3.2813802324889387,"count":0},{"bin_start":3.2813802324889387,"bin_end":3.321078999473695,"count":0},{"bin_start":3.321078999473695,"bin_end":3.3607777664584515,"count":1},{"bin_start":3.3607777664584515,"bin_end":3.4004765334432077,"count":1},{"bin_start":3.4004765334432077,"bin_end":3.440175300427964,"count":0},{"bin_start":3.440175300427964,"bin_end":3.47987406741272,"count":0},{"bin_start":3.47987406741272,"bin_end":3.519572834397476,"count":2}]}},{"name":"_deepnote_index_column","dtype":"int64"}],"rows":[{"age":"19","sex":"female","bmi":"27.9","children":"0","smoker":"yes","region":"southwest","charges":"16884.924","bmi_lt_map":"3.32862668882732","bmi_lt_listcomp":"3.32862668882732","_deepnote_index_column":"0"},{"age":"18","sex":"male","bmi":"33.77","children":"1","smoker":"no","region":"southeast","charges":"1725.5523","bmi_lt_map":"3.519572834397476","bmi_lt_listcomp":"3.519572834397476","_deepnote_index_column":"1"},{"age":"28","sex":"male","bmi":"33.0","children":"3","smoker":"no","region":"southeast","charges":"4449.462","bmi_lt_map":"3.4965075614664802","bmi_lt_listcomp":"3.4965075614664802","_deepnote_index_column":"2"},{"age":"33","sex":"male","bmi":"22.705","children":"0","smoker":"no","region":"northwest","charges":"21984.47061","bmi_lt_map":"3.122585164549914","bmi_lt_listcomp":"3.122585164549914","_deepnote_index_column":"3"},{"age":"32","sex":"male","bmi":"28.88","children":"0","smoker":"no","region":"northwest","charges":"3866.8552","bmi_lt_map":"3.3631493140246254","bmi_lt_listcomp":"3.3631493140246254","_deepnote_index_column":"4"}]},"text/plain":"   age     sex     bmi  children smoker     region      charges  bmi_lt_map  \\\n0   19  female  27.900         0    yes  southwest  16884.92400    3.328627   \n1   18    male  33.770         1     no  southeast   1725.55230    3.519573   \n2   28    male  33.000         3     no  southeast   4449.46200    3.496508   \n3   33    male  22.705         0     no  northwest  21984.47061    3.122585   \n4   32    male  28.880         0     no  northwest   3866.85520    3.363149   \n\n   bmi_lt_listcomp  \n0         3.328627  \n1         3.519573  \n2         3.496508  \n3         3.122585  \n4         3.363149  ","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>sex</th>\n      <th>bmi</th>\n      <th>children</th>\n      <th>smoker</th>\n      <th>region</th>\n      <th>charges</th>\n      <th>bmi_lt_map</th>\n      <th>bmi_lt_listcomp</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>19</td>\n      <td>female</td>\n      <td>27.900</td>\n      <td>0</td>\n      <td>yes</td>\n      <td>southwest</td>\n      <td>16884.92400</td>\n      <td>3.328627</td>\n      <td>3.328627</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>18</td>\n      <td>male</td>\n      <td>33.770</td>\n      <td>1</td>\n      <td>no</td>\n      <td>southeast</td>\n      <td>1725.55230</td>\n      <td>3.519573</td>\n      <td>3.519573</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>28</td>\n      <td>male</td>\n      <td>33.000</td>\n      <td>3</td>\n      <td>no</td>\n      <td>southeast</td>\n      <td>4449.46200</td>\n      <td>3.496508</td>\n      <td>3.496508</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>33</td>\n      <td>male</td>\n      <td>22.705</td>\n      <td>0</td>\n      <td>no</td>\n      <td>northwest</td>\n      <td>21984.47061</td>\n      <td>3.122585</td>\n      <td>3.122585</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>32</td>\n      <td>male</td>\n      <td>28.880</td>\n      <td>0</td>\n      <td>no</td>\n      <td>northwest</td>\n      <td>3866.85520</td>\n      <td>3.363149</td>\n      <td>3.363149</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"metadata":{}}],"execution_count":6},{"cell_type":"code","source":"df['bmi_lt_direct'] = np.log(df['bmi'])","metadata":{"tags":[],"cell_id":"c308658a91064fd8a1c79f5a8a2f0d43","allow_embed":"code_output","source_hash":"b203a072","execution_start":1671471973406,"execution_millis":2,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":7},{"cell_type":"code","source":"my_predictions = [[0.5, 0.2, 0.8], [0.3, 0.1, 0.9], [1.0, 0.2, 0.7]]","metadata":{"tags":[],"cell_id":"2e35b2e354f243db9cf9a71896de6186","allow_embed":"code_output","source_hash":"a9e90b01","execution_start":1671471973410,"execution_millis":1,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":8},{"cell_type":"code","source":"flattened_predictions = [prob for row in my_predictions for prob in row]\nprint(flattened_predictions)","metadata":{"tags":[],"cell_id":"5864af64293a495fbd99b231f17c378f","allow_embed":"code_output","source_hash":"e5e060d5","execution_start":1671471973413,"execution_millis":9,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"[0.5, 0.2, 0.8, 0.3, 0.1, 0.9, 1.0, 0.2, 0.7]\n","output_type":"stream"}],"execution_count":9},{"cell_type":"code","source":"flat_fl = []\nfor row in my_predictions:\n    for prob in row:\n        flat_fl.append(prob)\nprint(flat_fl)","metadata":{"tags":[],"cell_id":"809181cccc09408f9f490d70fa8bc603","allow_embed":"code_output","source_hash":"5fa68ca4","execution_start":1671471973458,"execution_millis":4,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"[0.5, 0.2, 0.8, 0.3, 0.1, 0.9, 1.0, 0.2, 0.7]\n","output_type":"stream"}],"execution_count":10},{"cell_type":"code","source":"ml_labels = [['Yes' if prob >= 0.8  else 'Maybe' if (prob > 0.5 and prob < 0.8) else 'No' for prob in row] for row in my_predictions]\nprint(my_predictions)\nprint(ml_labels)","metadata":{"tags":[],"cell_id":"2f3136c0e4df48a09b092cc4adb3692c","allow_embed":"code_output","source_hash":"f3e6c582","execution_start":1671471973459,"execution_millis":3,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"[[0.5, 0.2, 0.8], [0.3, 0.1, 0.9], [1.0, 0.2, 0.7]]\n[['No', 'No', 'Yes'], ['No', 'No', 'Yes'], ['Yes', 'No', 'Maybe']]\n","output_type":"stream"}],"execution_count":11},{"cell_type":"code","source":"fl_labels = []\n\nfor row in my_predictions:\n    hold_list = []\n    for prob in row:\n        if prob >= 0.8:\n            hold_list.append('Yes')\n        elif (prob > 0.5 and prob < 0.8):\n            hold_list.append('Maybe')\n        else:\n            hold_list.append('No')\n    fl_labels.append(hold_list)\n\nprint(my_predictions)\nprint(fl_labels)\n\n","metadata":{"tags":[],"cell_id":"81cc389958484fc7840fc640f49085a5","allow_embed":"code_output","source_hash":"281fe39d","execution_start":1671471973460,"execution_millis":2,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"[[0.5, 0.2, 0.8], [0.3, 0.1, 0.9], [1.0, 0.2, 0.7]]\n[['No', 'No', 'Yes'], ['No', 'No', 'Yes'], ['Yes', 'No', 'Maybe']]\n","output_type":"stream"}],"execution_count":12},{"cell_type":"code","source":"import numpy as np \nmu, sigma = 0.5, 0.1\nnp.random.seed(42)\nprobs = np.random.normal(mu, sigma, 100000000)\nprint(probs[:10])","metadata":{"tags":[],"cell_id":"1b7f647b0e47402993e5f049baa13315","allow_embed":"code_output","source_hash":"3befc574","execution_start":1671471973462,"execution_millis":3630,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"[0.54967142 0.48617357 0.56476885 0.65230299 0.47658466 0.4765863\n 0.65792128 0.57674347 0.45305256 0.554256  ]\n","output_type":"stream"}],"execution_count":13},{"cell_type":"code","source":"prob_labels = ['Yes' if prob > 0.5 else 'No' for prob in probs]","metadata":{"tags":[],"cell_id":"2fc6ac4f380c4e47890a26a04b2a6475","allow_embed":"code_output","source_hash":"badae281","execution_start":1671471977131,"execution_millis":7784,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":14},{"cell_type":"code","source":"print(prob_labels[:10])","metadata":{"tags":[],"cell_id":"47dfdb82f82d42cea835539bcd12d70b","allow_embed":"code_output","source_hash":"80cbc92d","execution_start":1671471984918,"execution_millis":2,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"['Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes']\n","output_type":"stream"}],"execution_count":15},{"cell_type":"code","source":"prob_generator = ('Yes' if prob > 0.5 else 'No' for prob in probs)\nprobs_sublist = []\nfor i in range(0, 11):\n    probs_sublist.append(next(prob_generator))\nprint(probs_sublist)","metadata":{"tags":[],"cell_id":"0b5a059b7efa49ea8b3d8d15f1f76639","allow_embed":"code_output","source_hash":"2934b395","execution_start":1671472059158,"execution_millis":5,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"['Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No']\n","output_type":"stream"}],"execution_count":18},{"cell_type":"markdown","source":"<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=d10c461a-6172-4dc9-a3d6-4cef5899ba0d' target=\"_blank\">\n<img alt='Created in deepnote.com' style='display:inline;max-height:16px;margin:0px;margin-right:7.5px;' src='data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iODBweCIgaGVpZ2h0PSI4MHB4IiB2aWV3Qm94PSIwIDAgODAgODAiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDU0LjEgKDc2NDkwKSAtIGh0dHBzOi8vc2tldGNoYXBwLmNvbSAtLT4KICAgIDx0aXRsZT5Hcm91cCAzPC90aXRsZT4KICAgIDxkZXNjPkNyZWF0ZWQgd2l0aCBTa2V0Y2guPC9kZXNjPgogICAgPGcgaWQ9IkxhbmRpbmciIHN0cm9rZT0ibm9uZSIgc3Ryb2tlLXdpZHRoPSIxIiBmaWxsPSJub25lIiBmaWxsLXJ1bGU9ImV2ZW5vZGQiPgogICAgICAgIDxnIGlkPSJBcnRib2FyZCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTEyMzUuMDAwMDAwLCAtNzkuMDAwMDAwKSI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0zIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgxMjM1LjAwMDAwMCwgNzkuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8cG9seWdvbiBpZD0iUGF0aC0yMCIgZmlsbD0iIzAyNjVCNCIgcG9pbnRzPSIyLjM3NjIzNzYyIDgwIDM4LjA0NzY2NjcgODAgNTcuODIxNzgyMiA3My44MDU3NTkyIDU3LjgyMTc4MjIgMzIuNzU5MjczOSAzOS4xNDAyMjc4IDMxLjY4MzE2ODMiPjwvcG9seWdvbj4KICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0zNS4wMDc3MTgsODAgQzQyLjkwNjIwMDcsNzYuNDU0OTM1OCA0Ny41NjQ5MTY3LDcxLjU0MjI2NzEgNDguOTgzODY2LDY1LjI2MTk5MzkgQzUxLjExMjI4OTksNTUuODQxNTg0MiA0MS42NzcxNzk1LDQ5LjIxMjIyODQgMjUuNjIzOTg0Niw0OS4yMTIyMjg0IEMyNS40ODQ5Mjg5LDQ5LjEyNjg0NDggMjkuODI2MTI5Niw0My4yODM4MjQ4IDM4LjY0NzU4NjksMzEuNjgzMTY4MyBMNzIuODcxMjg3MSwzMi41NTQ0MjUgTDY1LjI4MDk3Myw2Ny42NzYzNDIxIEw1MS4xMTIyODk5LDc3LjM3NjE0NCBMMzUuMDA3NzE4LDgwIFoiIGlkPSJQYXRoLTIyIiBmaWxsPSIjMDAyODY4Ij48L3BhdGg+CiAgICAgICAgICAgICAgICA8cGF0aCBkPSJNMCwzNy43MzA0NDA1IEwyNy4xMTQ1MzcsMC4yNTcxMTE0MzYgQzYyLjM3MTUxMjMsLTEuOTkwNzE3MDEgODAsMTAuNTAwMzkyNyA4MCwzNy43MzA0NDA1IEM4MCw2NC45NjA0ODgyIDY0Ljc3NjUwMzgsNzkuMDUwMzQxNCAzNC4zMjk1MTEzLDgwIEM0Ny4wNTUzNDg5LDc3LjU2NzA4MDggNTMuNDE4MjY3Nyw3MC4zMTM2MTAzIDUzLjQxODI2NzcsNTguMjM5NTg4NSBDNTMuNDE4MjY3Nyw0MC4xMjg1NTU3IDM2LjMwMzk1NDQsMzcuNzMwNDQwNSAyNS4yMjc0MTcsMzcuNzMwNDQwNSBDMTcuODQzMDU4NiwzNy43MzA0NDA1IDkuNDMzOTE5NjYsMzcuNzMwNDQwNSAwLDM3LjczMDQ0MDUgWiIgaWQ9IlBhdGgtMTkiIGZpbGw9IiMzNzkzRUYiPjwvcGF0aD4KICAgICAgICAgICAgPC9nPgogICAgICAgIDwvZz4KICAgIDwvZz4KPC9zdmc+' > </img>\nCreated in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>","metadata":{"tags":[],"created_in_deepnote_cell":true,"deepnote_cell_type":"markdown"}}],"nbformat":4,"nbformat_minor":0,"metadata":{"deepnote":{},"orig_nbformat":2,"deepnote_notebook_id":"329863938022481ba67e3c24b20c36dc","deepnote_persisted_session":{"createdAt":"2022-12-14T20:03:02.675Z"},"deepnote_execution_queue":[]}}


--------------------------------------------------------------------------------
/p_values_ml.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"code","source":"%pip install statsmodels\n%pip install mlxtend","metadata":{"tags":[],"cell_id":"32206ddb84a04303a33c22833739f36b","allow_embed":"code_output","source_hash":"23dc76b8","execution_start":1672948325052,"execution_millis":6397,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"Collecting statsmodels\n  Using cached statsmodels-0.13.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.9 MB)\nRequirement already satisfied: scipy>=1.3 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from statsmodels) (1.9.3)\nRequirement already satisfied: packaging>=21.3 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from statsmodels) (21.3)\nRequirement already satisfied: pandas>=0.25 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from statsmodels) (1.2.5)\nRequirement already satisfied: numpy>=1.17 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from statsmodels) (1.23.4)\nCollecting patsy>=0.5.2\n  Using cached patsy-0.5.3-py2.py3-none-any.whl (233 kB)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from packaging>=21.3->statsmodels) (3.0.9)\nRequirement already satisfied: pytz>=2017.3 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from pandas>=0.25->statsmodels) (2022.5)\nRequirement already satisfied: python-dateutil>=2.7.3 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from pandas>=0.25->statsmodels) (2.8.2)\nRequirement already satisfied: six in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from patsy>=0.5.2->statsmodels) (1.16.0)\nInstalling collected packages: patsy, statsmodels\nSuccessfully installed patsy-0.5.3 statsmodels-0.13.5\n\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n\u001b[0m\u001b[33mWARNING: You are using pip version 22.0.4; however, version 22.3.1 is available.\nYou should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n\u001b[0mNote: you may need to restart the kernel to use updated packages.\nRequirement already satisfied: mlxtend in /usr/local/lib/python3.9/site-packages (0.21.0)\nRequirement already satisfied: matplotlib>=3.0.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from mlxtend) (3.6.0)\nRequirement already satisfied: setuptools in /root/venv/lib/python3.9/site-packages (from mlxtend) (58.1.0)\nRequirement already satisfied: pandas>=0.24.2 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from mlxtend) (1.2.5)\nRequirement already satisfied: numpy>=1.16.2 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from mlxtend) (1.23.4)\nRequirement already satisfied: joblib>=0.13.2 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from mlxtend) (1.2.0)\nRequirement already satisfied: scipy>=1.2.1 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from mlxtend) (1.9.3)\nRequirement already satisfied: scikit-learn>=1.0.2 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from mlxtend) (1.1.2)\nRequirement already satisfied: contourpy>=1.0.1 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from matplotlib>=3.0.0->mlxtend) (1.0.5)\nRequirement already satisfied: pyparsing>=2.2.1 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from matplotlib>=3.0.0->mlxtend) (3.0.9)\nRequirement already satisfied: fonttools>=4.22.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from matplotlib>=3.0.0->mlxtend) (4.37.4)\nRequirement already satisfied: cycler>=0.10 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from matplotlib>=3.0.0->mlxtend) (0.11.0)\nRequirement already satisfied: packaging>=20.0 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from matplotlib>=3.0.0->mlxtend) (21.3)\nRequirement already satisfied: pillow>=6.2.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from matplotlib>=3.0.0->mlxtend) (9.2.0)\nRequirement already satisfied: python-dateutil>=2.7 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from matplotlib>=3.0.0->mlxtend) (2.8.2)\nRequirement already satisfied: kiwisolver>=1.0.1 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from matplotlib>=3.0.0->mlxtend) (1.4.4)\nRequirement already satisfied: pytz>=2017.3 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from pandas>=0.24.2->mlxtend) (2022.5)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from scikit-learn>=1.0.2->mlxtend) (3.1.0)\nRequirement already satisfied: six>=1.5 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from python-dateutil>=2.7->matplotlib>=3.0.0->mlxtend) (1.16.0)\n\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n\u001b[0m\u001b[33mWARNING: You are using pip version 22.0.4; however, version 22.3.1 is available.\nYou should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n\u001b[0mNote: you may need to restart the kernel to use updated packages.\n","output_type":"stream"}],"execution_count":1},{"cell_type":"code","source":"import pandas as pd\ninsurance_df = pd.read_csv(\"insurance.csv\")\ninsurance_df.head()","metadata":{"tags":[],"cell_id":"8c2e85e92fd9459ba340be5e0f7e8f8b","allow_embed":"code_output","source_hash":"b7688c1a","execution_start":1672948331483,"execution_millis":2,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","execution_count":2,"data":{"application/vnd.deepnote.dataframe.v3+json":{"column_count":7,"row_count":5,"columns":[{"name":"age","dtype":"int64","stats":{"unique_count":5,"nan_count":0,"min":"18","max":"33","histogram":[{"bin_start":18,"bin_end":19.5,"count":2},{"bin_start":19.5,"bin_end":21,"count":0},{"bin_start":21,"bin_end":22.5,"count":0},{"bin_start":22.5,"bin_end":24,"count":0},{"bin_start":24,"bin_end":25.5,"count":0},{"bin_start":25.5,"bin_end":27,"count":0},{"bin_start":27,"bin_end":28.5,"count":1},{"bin_start":28.5,"bin_end":30,"count":0},{"bin_start":30,"bin_end":31.5,"count":0},{"bin_start":31.5,"bin_end":33,"count":2}]}},{"name":"sex","dtype":"object","stats":{"unique_count":2,"nan_count":0,"categories":[{"name":"male","count":4},{"name":"female","count":1}]}},{"name":"bmi","dtype":"float64","stats":{"unique_count":5,"nan_count":0,"min":"22.705","max":"33.77","histogram":[{"bin_start":22.705,"bin_end":23.8115,"count":1},{"bin_start":23.8115,"bin_end":24.918,"count":0},{"bin_start":24.918,"bin_end":26.0245,"count":0},{"bin_start":26.0245,"bin_end":27.131,"count":0},{"bin_start":27.131,"bin_end":28.2375,"count":1},{"bin_start":28.2375,"bin_end":29.344,"count":1},{"bin_start":29.344,"bin_end":30.4505,"count":0},{"bin_start":30.4505,"bin_end":31.557000000000002,"count":0},{"bin_start":31.557000000000002,"bin_end":32.6635,"count":0},{"bin_start":32.6635,"bin_end":33.77,"count":2}]}},{"name":"children","dtype":"int64","stats":{"unique_count":3,"nan_count":0,"min":"0","max":"3","histogram":[{"bin_start":0,"bin_end":0.3,"count":3},{"bin_start":0.3,"bin_end":0.6,"count":0},{"bin_start":0.6,"bin_end":0.8999999999999999,"count":0},{"bin_start":0.8999999999999999,"bin_end":1.2,"count":1},{"bin_start":1.2,"bin_end":1.5,"count":0},{"bin_start":1.5,"bin_end":1.7999999999999998,"count":0},{"bin_start":1.7999999999999998,"bin_end":2.1,"count":0},{"bin_start":2.1,"bin_end":2.4,"count":0},{"bin_start":2.4,"bin_end":2.6999999999999997,"count":0},{"bin_start":2.6999999999999997,"bin_end":3,"count":1}]}},{"name":"smoker","dtype":"object","stats":{"unique_count":2,"nan_count":0,"categories":[{"name":"no","count":4},{"name":"yes","count":1}]}},{"name":"region","dtype":"object","stats":{"unique_count":3,"nan_count":0,"categories":[{"name":"southeast","count":2},{"name":"northwest","count":2},{"name":"southwest","count":1}]}},{"name":"charges","dtype":"float64","stats":{"unique_count":5,"nan_count":0,"min":"1725.5523","max":"21984.47061","histogram":[{"bin_start":1725.5523,"bin_end":3751.444131,"count":1},{"bin_start":3751.444131,"bin_end":5777.335962,"count":2},{"bin_start":5777.335962,"bin_end":7803.227793000001,"count":0},{"bin_start":7803.227793000001,"bin_end":9829.119624,"count":0},{"bin_start":9829.119624,"bin_end":11855.011455,"count":0},{"bin_start":11855.011455,"bin_end":13880.903286,"count":0},{"bin_start":13880.903286,"bin_end":15906.795117,"count":0},{"bin_start":15906.795117,"bin_end":17932.686948000002,"count":1},{"bin_start":17932.686948000002,"bin_end":19958.578779,"count":0},{"bin_start":19958.578779,"bin_end":21984.47061,"count":1}]}},{"name":"_deepnote_index_column","dtype":"int64"}],"rows":[{"age":"19","sex":"female","bmi":"27.9","children":"0","smoker":"yes","region":"southwest","charges":"16884.924","_deepnote_index_column":"0"},{"age":"18","sex":"male","bmi":"33.77","children":"1","smoker":"no","region":"southeast","charges":"1725.5523","_deepnote_index_column":"1"},{"age":"28","sex":"male","bmi":"33.0","children":"3","smoker":"no","region":"southeast","charges":"4449.462","_deepnote_index_column":"2"},{"age":"33","sex":"male","bmi":"22.705","children":"0","smoker":"no","region":"northwest","charges":"21984.47061","_deepnote_index_column":"3"},{"age":"32","sex":"male","bmi":"28.88","children":"0","smoker":"no","region":"northwest","charges":"3866.8552","_deepnote_index_column":"4"}]},"text/plain":"   age     sex     bmi  children smoker     region      charges\n0   19  female  27.900         0    yes  southwest  16884.92400\n1   18    male  33.770         1     no  southeast   1725.55230\n2   28    male  33.000         3     no  southeast   4449.46200\n3   33    male  22.705         0     no  northwest  21984.47061\n4   32    male  28.880         0     no  northwest   3866.85520","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>sex</th>\n      <th>bmi</th>\n      <th>children</th>\n      <th>smoker</th>\n      <th>region</th>\n      <th>charges</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>19</td>\n      <td>female</td>\n      <td>27.900</td>\n      <td>0</td>\n      <td>yes</td>\n      <td>southwest</td>\n      <td>16884.92400</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>18</td>\n      <td>male</td>\n      <td>33.770</td>\n      <td>1</td>\n      <td>no</td>\n      <td>southeast</td>\n      <td>1725.55230</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>28</td>\n      <td>male</td>\n      <td>33.000</td>\n      <td>3</td>\n      <td>no</td>\n      <td>southeast</td>\n      <td>4449.46200</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>33</td>\n      <td>male</td>\n      <td>22.705</td>\n      <td>0</td>\n      <td>no</td>\n      <td>northwest</td>\n      <td>21984.47061</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>32</td>\n      <td>male</td>\n      <td>28.880</td>\n      <td>0</td>\n      <td>no</td>\n      <td>northwest</td>\n      <td>3866.85520</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"metadata":{}}],"execution_count":2},{"cell_type":"code","source":"from sklearn.model_selection import train_test_split\nX = insurance_df[['bmi', 'age', 'children']]\ny = insurance_df['charges']\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)","metadata":{"tags":[],"cell_id":"e347f4bec2f94310b26240d19f59fdf2","allow_embed":"code_output","source_hash":"760c53b7","execution_start":1672948331486,"execution_millis":1510,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":3},{"cell_type":"code","source":"import statsmodels.api as sm\nX_train = sm.add_constant(X_train)\nlinear_reg_model = sm.OLS(y_train, X_train)\nlinear_reg_model = linear_reg_model.fit()\nprint(linear_reg_model.summary())","metadata":{"tags":[],"cell_id":"fa1737e8e4954602a106fa885e828d76","allow_embed":"code_output","source_hash":"537cc56d","execution_start":1672948332999,"execution_millis":363,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"                            OLS Regression Results                            \n==============================================================================\nDep. Variable:                charges   R-squared:                       0.110\nModel:                            OLS   Adj. R-squared:                  0.107\nMethod:                 Least Squares   F-statistic:                     43.86\nDate:                Thu, 05 Jan 2023   Prob (F-statistic):           9.94e-27\nTime:                        19:52:13   Log-Likelihood:                -11507.\nNo. Observations:                1070   AIC:                         2.302e+04\nDf Residuals:                    1066   BIC:                         2.304e+04\nDf Model:                           3                                         \nCovariance Type:            nonrobust                                         \n==============================================================================\n                 coef    std err          t      P>|t|      [0.025      0.975]\n------------------------------------------------------------------------------\nconst      -6118.0462   1968.214     -3.108      0.002   -9980.059   -2256.033\nbmi          332.2025     57.882      5.739      0.000     218.626     445.779\nage          220.7578     24.901      8.865      0.000     171.898     269.618\nchildren     563.0194    286.186      1.967      0.049       1.467    1124.572\n==============================================================================\nOmnibus:                      263.642   Durbin-Watson:                   1.938\nProb(Omnibus):                  0.000   Jarque-Bera (JB):              489.281\nSkew:                           1.524   Prob(JB):                    5.68e-107\nKurtosis:                       4.297   Cond. No.                         291.\n==============================================================================\n\nNotes:\n[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n","output_type":"stream"}],"execution_count":4},{"cell_type":"code","source":"import numpy as np\nchurn_df = pd.read_csv(\"telco_churn.csv\")\nchurn_df.head()\nchurn_df['Churn'] = np.where(churn_df['Churn']=='Yes', 1, 0)","metadata":{"tags":[],"cell_id":"b7a3ac2681b94e3eabab0fcc7a65ad15","allow_embed":"code_output","source_hash":"358cc104","execution_start":1672948333363,"execution_millis":61,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":5},{"cell_type":"code","source":"X = churn_df[['tenure', 'MonthlyCharges']]\nX.loc[:,'tenure_squared'] = [x**2 for x in list(churn_df['tenure'])]\ny = churn_df['Churn']\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)","metadata":{"tags":[],"cell_id":"fe161ab5c391412c900f503fbe9d9d8a","allow_embed":"code_output","source_hash":"11414d95","execution_start":1672948333431,"execution_millis":11,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stderr","text":"/shared-libs/python3.9/py/lib/python3.9/site-packages/pandas/core/indexing.py:1597: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  self.obj[key] = value\n/shared-libs/python3.9/py/lib/python3.9/site-packages/pandas/core/indexing.py:1676: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  self._setitem_single_column(ilocs[0], value, pi)\n","output_type":"stream"}],"execution_count":6},{"cell_type":"code","source":"X_train = sm.add_constant(X_train)\nlog_reg_model = sm.Logit(y_train, X_train)\nlog_reg_model = log_reg_model.fit()\nprint(log_reg_model.summary())","metadata":{"tags":[],"cell_id":"ee152531fd5a40d68d1aef5eac9967f1","allow_embed":"code_output","source_hash":"4886aeca","execution_start":1672948333498,"execution_millis":3,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"Optimization terminated successfully.\n         Current function value: 0.458122\n         Iterations 7\n                           Logit Regression Results                           \n==============================================================================\nDep. Variable:                  Churn   No. Observations:                 5634\nModel:                          Logit   Df Residuals:                     5630\nMethod:                           MLE   Df Model:                            3\nDate:                Thu, 05 Jan 2023   Pseudo R-squ.:                  0.2084\nTime:                        19:52:13   Log-Likelihood:                -2581.1\nconverged:                       True   LL-Null:                       -3260.7\nCovariance Type:            nonrobust   LLR p-value:                1.923e-294\n==================================================================================\n                     coef    std err          z      P>|z|      [0.025      0.975]\n----------------------------------------------------------------------------------\nconst             -1.6265      0.102    -16.021      0.000      -1.826      -1.428\ntenure            -0.0753      0.006    -12.991      0.000      -0.087      -0.064\nMonthlyCharges     0.0325      0.001     22.318      0.000       0.030       0.035\ntenure_squared     0.0003   8.29e-05      4.026      0.000       0.000       0.000\n==================================================================================\n","output_type":"stream"}],"execution_count":7},{"cell_type":"code","source":"from sklearn.ensemble import RandomForestRegressor\nX = insurance_df[['bmi', 'age', 'children']]\ny = insurance_df['charges']\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nrf_model1 = RandomForestRegressor(random_state=42)\nrf_model1.fit(X_train, y_train)\ny_pred1 = rf_model1.predict(X_test)\n\nrf_model2 = RandomForestRegressor(n_estimators= 50, max_depth=50, random_state=42)\nrf_model2.fit(X_train, y_train)\ny_pred2 = rf_model2.predict(X_test)\n","metadata":{"tags":[],"cell_id":"828696db519d4885b14642d1daa0208e","allow_embed":"code_output","source_hash":"eb4138e6","execution_start":1672948333499,"execution_millis":2342,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":8},{"cell_type":"code","source":"from mlxtend.evaluate import paired_ttest_5x2cv\n_, p_value = paired_ttest_5x2cv(estimator1=rf_model1, estimator2=rf_model2, scoring='neg_mean_squared_error', X=X_train, y=y_train, random_seed=42)\nif p_value < 0.05:\n    print(f\"P-value of {p_value} give evidence that model difference is significant\")\nelse:\n    print(f\"P-value of {p_value} give evidence that model difference is not significant\")","metadata":{"tags":[],"cell_id":"cd9091365e55489497a6a6d44b4a3a2a","allow_embed":"code_output","source_hash":"96ccb6c4","execution_start":1672948396105,"execution_millis":2092,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"P-value of 0.33466026017280154 give evidence that model difference is not significant\n","output_type":"stream"}],"execution_count":11},{"cell_type":"code","source":"from sklearn.linear_model import LinearRegression\nlr_model1 = LinearRegression()\nlr_model1.fit(X_train, y_train)\n_, p_value = paired_ttest_5x2cv(estimator1=rf_model1, estimator2=lr_model1, scoring='neg_mean_squared_error', X=X_train, y=y_train, random_seed=42)\nif p_value < 0.05:\n    print(f\"P-value of {p_value} give evidence that model difference is significant\")\nelse:\n    print(f\"P-value of {p_value} give evidence that model difference is not significant\")","metadata":{"tags":[],"cell_id":"165426efba9c44979b208430a9c650ba","allow_embed":"code_output","source_hash":"a7695e6b","execution_start":1672948632113,"execution_millis":1398,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"P-value of 0.002834207433757008 give evidence that model difference is significant\n","output_type":"stream"}],"execution_count":19},{"cell_type":"markdown","source":"<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=434b0236-878d-4de0-9743-ad548853111f' target=\"_blank\">\n<img alt='Created in deepnote.com' style='display:inline;max-height:16px;margin:0px;margin-right:7.5px;' src='data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iODBweCIgaGVpZ2h0PSI4MHB4IiB2aWV3Qm94PSIwIDAgODAgODAiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDU0LjEgKDc2NDkwKSAtIGh0dHBzOi8vc2tldGNoYXBwLmNvbSAtLT4KICAgIDx0aXRsZT5Hcm91cCAzPC90aXRsZT4KICAgIDxkZXNjPkNyZWF0ZWQgd2l0aCBTa2V0Y2guPC9kZXNjPgogICAgPGcgaWQ9IkxhbmRpbmciIHN0cm9rZT0ibm9uZSIgc3Ryb2tlLXdpZHRoPSIxIiBmaWxsPSJub25lIiBmaWxsLXJ1bGU9ImV2ZW5vZGQiPgogICAgICAgIDxnIGlkPSJBcnRib2FyZCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTEyMzUuMDAwMDAwLCAtNzkuMDAwMDAwKSI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0zIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgxMjM1LjAwMDAwMCwgNzkuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8cG9seWdvbiBpZD0iUGF0aC0yMCIgZmlsbD0iIzAyNjVCNCIgcG9pbnRzPSIyLjM3NjIzNzYyIDgwIDM4LjA0NzY2NjcgODAgNTcuODIxNzgyMiA3My44MDU3NTkyIDU3LjgyMTc4MjIgMzIuNzU5MjczOSAzOS4xNDAyMjc4IDMxLjY4MzE2ODMiPjwvcG9seWdvbj4KICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0zNS4wMDc3MTgsODAgQzQyLjkwNjIwMDcsNzYuNDU0OTM1OCA0Ny41NjQ5MTY3LDcxLjU0MjI2NzEgNDguOTgzODY2LDY1LjI2MTk5MzkgQzUxLjExMjI4OTksNTUuODQxNTg0MiA0MS42NzcxNzk1LDQ5LjIxMjIyODQgMjUuNjIzOTg0Niw0OS4yMTIyMjg0IEMyNS40ODQ5Mjg5LDQ5LjEyNjg0NDggMjkuODI2MTI5Niw0My4yODM4MjQ4IDM4LjY0NzU4NjksMzEuNjgzMTY4MyBMNzIuODcxMjg3MSwzMi41NTQ0MjUgTDY1LjI4MDk3Myw2Ny42NzYzNDIxIEw1MS4xMTIyODk5LDc3LjM3NjE0NCBMMzUuMDA3NzE4LDgwIFoiIGlkPSJQYXRoLTIyIiBmaWxsPSIjMDAyODY4Ij48L3BhdGg+CiAgICAgICAgICAgICAgICA8cGF0aCBkPSJNMCwzNy43MzA0NDA1IEwyNy4xMTQ1MzcsMC4yNTcxMTE0MzYgQzYyLjM3MTUxMjMsLTEuOTkwNzE3MDEgODAsMTAuNTAwMzkyNyA4MCwzNy43MzA0NDA1IEM4MCw2NC45NjA0ODgyIDY0Ljc3NjUwMzgsNzkuMDUwMzQxNCAzNC4zMjk1MTEzLDgwIEM0Ny4wNTUzNDg5LDc3LjU2NzA4MDggNTMuNDE4MjY3Nyw3MC4zMTM2MTAzIDUzLjQxODI2NzcsNTguMjM5NTg4NSBDNTMuNDE4MjY3Nyw0MC4xMjg1NTU3IDM2LjMwMzk1NDQsMzcuNzMwNDQwNSAyNS4yMjc0MTcsMzcuNzMwNDQwNSBDMTcuODQzMDU4NiwzNy43MzA0NDA1IDkuNDMzOTE5NjYsMzcuNzMwNDQwNSAwLDM3LjczMDQ0MDUgWiIgaWQ9IlBhdGgtMTkiIGZpbGw9IiMzNzkzRUYiPjwvcGF0aD4KICAgICAgICAgICAgPC9nPgogICAgICAgIDwvZz4KICAgIDwvZz4KPC9zdmc+' > </img>\nCreated in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>","metadata":{"tags":[],"created_in_deepnote_cell":true,"deepnote_cell_type":"markdown"}}],"nbformat":4,"nbformat_minor":0,"metadata":{"deepnote":{},"orig_nbformat":2,"deepnote_notebook_id":"fafecebbb0c84a0e9feb8da62f4ef07f","deepnote_persisted_session":{"createdAt":"2023-01-05T20:16:48.418Z"},"deepnote_execution_queue":[]}}


--------------------------------------------------------------------------------
/stable_diffusion/hello.py:
--------------------------------------------------------------------------------
1 | priint("Hi")
2 | 


--------------------------------------------------------------------------------
/tabgan_experiements.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"code","source":"%pip install tabgan=='1.1.0'\n%pip install Faker\n%pip install catboost","metadata":{"tags":[],"cell_id":"abbcf329e47b4403aff29663a4198fb3","allow_embed":"code_output","source_hash":"9bb28577","execution_start":1667682202597,"execution_millis":4981,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"Requirement already satisfied: tabgan==1.1.0 in /usr/local/lib/python3.8/dist-packages (1.1.0)\nRequirement already satisfied: torch in /shared-libs/python3.8/py/lib/python3.8/site-packages (from tabgan==1.1.0) (1.12.1)\nRequirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from tabgan==1.1.0) (1.22.4)\nRequirement already satisfied: category-encoders in /usr/local/lib/python3.8/dist-packages (from tabgan==1.1.0) (2.5.1.post0)\nRequirement already satisfied: scikit-learn in /shared-libs/python3.8/py/lib/python3.8/site-packages (from tabgan==1.1.0) (1.1.2)\nRequirement already satisfied: lightgbm in /usr/local/lib/python3.8/dist-packages (from tabgan==1.1.0) (3.3.3)\nRequirement already satisfied: python-dateutil in /shared-libs/python3.8/py-core/lib/python3.8/site-packages (from tabgan==1.1.0) (2.8.2)\nRequirement already satisfied: torchvision in /shared-libs/python3.8/py/lib/python3.8/site-packages (from tabgan==1.1.0) (0.13.1)\nRequirement already satisfied: pandas in /shared-libs/python3.8/py/lib/python3.8/site-packages (from tabgan==1.1.0) (1.2.5)\nRequirement already satisfied: tqdm in /shared-libs/python3.8/py/lib/python3.8/site-packages (from tabgan==1.1.0) (4.64.1)\nRequirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch->tabgan==1.1.0) (4.2.0)\nRequirement already satisfied: patsy>=0.5.1 in /usr/local/lib/python3.8/dist-packages (from category-encoders->tabgan==1.1.0) (0.5.3)\nRequirement already satisfied: scipy>=1.0.0 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from category-encoders->tabgan==1.1.0) (1.9.3)\nRequirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.8/dist-packages (from category-encoders->tabgan==1.1.0) (0.13.5)\nRequirement already satisfied: joblib>=1.0.0 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from scikit-learn->tabgan==1.1.0) (1.2.0)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from scikit-learn->tabgan==1.1.0) (3.1.0)\nRequirement already satisfied: wheel in /usr/lib/python3/dist-packages (from lightgbm->tabgan==1.1.0) (0.34.2)\nRequirement already satisfied: six>=1.5 in /shared-libs/python3.8/py-core/lib/python3.8/site-packages (from python-dateutil->tabgan==1.1.0) (1.16.0)\nRequirement already satisfied: requests in /usr/lib/python3/dist-packages (from torchvision->tabgan==1.1.0) (2.22.0)\nRequirement already satisfied: pillow!=8.3.*,>=5.3.0 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from torchvision->tabgan==1.1.0) (9.2.0)\nRequirement already satisfied: pytz>=2017.3 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from pandas->tabgan==1.1.0) (2022.5)\nRequirement already satisfied: packaging>=21.3 in /shared-libs/python3.8/py-core/lib/python3.8/site-packages (from statsmodels>=0.9.0->category-encoders->tabgan==1.1.0) (21.3)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /shared-libs/python3.8/py-core/lib/python3.8/site-packages (from packaging>=21.3->statsmodels>=0.9.0->category-encoders->tabgan==1.1.0) (3.0.9)\n\u001b[33mWARNING: You are using pip version 20.2.4; however, version 22.3.1 is available.\nYou should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\u001b[0m\nNote: you may need to restart the kernel to use updated packages.\nRequirement already satisfied: Faker in /usr/local/lib/python3.8/dist-packages (15.2.0)\nRequirement already satisfied: python-dateutil>=2.4 in /shared-libs/python3.8/py-core/lib/python3.8/site-packages (from Faker) (2.8.2)\nRequirement already satisfied: six>=1.5 in /shared-libs/python3.8/py-core/lib/python3.8/site-packages (from python-dateutil>=2.4->Faker) (1.16.0)\n\u001b[33mWARNING: You are using pip version 20.2.4; however, version 22.3.1 is available.\nYou should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\u001b[0m\nNote: you may need to restart the kernel to use updated packages.\nRequirement already satisfied: catboost in /usr/local/lib/python3.8/dist-packages (1.1.1)\nRequirement already satisfied: plotly in /shared-libs/python3.8/py/lib/python3.8/site-packages (from catboost) (5.10.0)\nRequirement already satisfied: six in /shared-libs/python3.8/py-core/lib/python3.8/site-packages (from catboost) (1.16.0)\nRequirement already satisfied: numpy>=1.16.0 in /usr/local/lib/python3.8/dist-packages (from catboost) (1.22.4)\nRequirement already satisfied: graphviz in /usr/local/lib/python3.8/dist-packages (from catboost) (0.20.1)\nRequirement already satisfied: matplotlib in /shared-libs/python3.8/py/lib/python3.8/site-packages (from catboost) (3.6.0)\nRequirement already satisfied: scipy in /shared-libs/python3.8/py/lib/python3.8/site-packages (from catboost) (1.9.3)\nRequirement already satisfied: pandas>=0.24.0 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from catboost) (1.2.5)\nRequirement already satisfied: tenacity>=6.2.0 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from plotly->catboost) (8.1.0)\nRequirement already satisfied: fonttools>=4.22.0 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from matplotlib->catboost) (4.37.4)\nRequirement already satisfied: python-dateutil>=2.7 in /shared-libs/python3.8/py-core/lib/python3.8/site-packages (from matplotlib->catboost) (2.8.2)\nRequirement already satisfied: cycler>=0.10 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from matplotlib->catboost) (0.11.0)\nRequirement already satisfied: pillow>=6.2.0 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from matplotlib->catboost) (9.2.0)\nRequirement already satisfied: contourpy>=1.0.1 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from matplotlib->catboost) (1.0.5)\nRequirement already satisfied: packaging>=20.0 in /shared-libs/python3.8/py-core/lib/python3.8/site-packages (from matplotlib->catboost) (21.3)\nRequirement already satisfied: pyparsing>=2.2.1 in /shared-libs/python3.8/py-core/lib/python3.8/site-packages (from matplotlib->catboost) (3.0.9)\nRequirement already satisfied: kiwisolver>=1.0.1 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from matplotlib->catboost) (1.4.4)\nRequirement already satisfied: pytz>=2017.3 in /shared-libs/python3.8/py/lib/python3.8/site-packages (from pandas>=0.24.0->catboost) (2022.5)\n\u001b[33mWARNING: You are using pip version 20.2.4; however, version 22.3.1 is available.\nYou should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\u001b[0m\nNote: you may need to restart the kernel to use updated packages.\n","output_type":"stream"}],"execution_count":1},{"cell_type":"code","source":"from faker import Faker\nimport random\n\nrandom.seed(10)\nDATA_SIZE = 5000","metadata":{"tags":[],"cell_id":"95fd44a9696349ae9bceaa1724f3b6a4","source_hash":"4a38f707","execution_start":1667682207582,"execution_millis":24,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":2},{"cell_type":"code","source":"fake = Faker()\nFaker.seed(42)\nnames = []\nfor i in range(0,DATA_SIZE):\n    names.append(fake.name())","metadata":{"tags":[],"cell_id":"786d6991d489447888ee0bf50a2fd712","source_hash":"c24ea4d5","execution_start":1667682207610,"execution_millis":368,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":3},{"cell_type":"code","source":"Faker.seed(42)\nstate = []\nfor i in range(0,DATA_SIZE):\n    state.append(fake.state())","metadata":{"tags":[],"cell_id":"0d3b5c4cfd9b4efaa7d5daf326019219","source_hash":"a1e932b7","execution_start":1667682207990,"execution_millis":6,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":4},{"cell_type":"code","source":"specialty = []\nfor i in range(0, DATA_SIZE):\n    specialty.append(\"Emergency Medicine\")","metadata":{"tags":[],"cell_id":"cb826a0b4eab43fc821222aff5ea778e","source_hash":"f51fa424","execution_start":1667682207996,"execution_millis":1,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":5},{"cell_type":"code","source":"from faker.providers import DynamicProvider\nhealth_insurance_provider = DynamicProvider(\n     provider_name=\"health_insurance\",\n     elements=[\"UnitedHealth Group\", \"Anthem\", \"Aetna\", \"Cigna\", \"Humana\", \"Medicare\"],\n)\nFaker.seed(42)\n\nfake.add_provider(health_insurance_provider)\n\ninsurance = []\nfor i in range(0, DATA_SIZE):\n    insurance.append(fake.health_insurance())","metadata":{"tags":[],"cell_id":"8bd583f5bbb4442d8416313f4787dd8f","allow_embed":"code_output","source_hash":"b1d2da56","execution_start":1667682208001,"execution_millis":12,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":6},{"cell_type":"code","source":"Faker.seed(42)\nsex = []\nfor i in range(0, DATA_SIZE):\n    sex.append(fake.profile()['sex'])","metadata":{"tags":[],"cell_id":"8e352b7ff91847aeaa0992fc9572d2a6","source_hash":"5e51c6a6","execution_start":1667682208021,"execution_millis":3819,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":7},{"cell_type":"code","source":"import numpy as np\n\nnp.random.seed(42)\nmu, sigma = 50, 20\nage = np.random.normal(mu, sigma, DATA_SIZE)\nage_int = np.round(age)","metadata":{"tags":[],"cell_id":"f2b6cae348a54e33b89b89545fafe2ed","source_hash":"fcce59d1","execution_start":1667682211845,"execution_millis":0,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":8},{"cell_type":"code","source":"np.random.seed(42)\nmu, sigma = 180, 70\nweight = np.round(np.random.normal(mu, sigma, DATA_SIZE), 2)","metadata":{"tags":[],"cell_id":"9da07d6ec1a348e4878d7698f4404a7f","source_hash":"877d4a20","execution_start":1667682211846,"execution_millis":7,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":9},{"cell_type":"code","source":"np.random.seed(42)\nmu, sigma = 68, 8\nheight = np.round(np.random.normal(mu, sigma, DATA_SIZE), 2)","metadata":{"tags":[],"cell_id":"5c400f407a1943908a5406304aef08f4","allow_embed":"code_output","source_hash":"4a85cc4","execution_start":1667682211853,"execution_millis":0,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":10},{"cell_type":"code","source":"smoker = []\nfor i in range(0, DATA_SIZE):\n    smoker.append(np.random.randint(0,2))\n\nsmoker_category = ['Yes' if x == 1 else 'No' for x in smoker]","metadata":{"tags":[],"cell_id":"68a3fef3032f4767883dc0656c657a64","allow_embed":"code_output","source_hash":"27c1d06b","execution_start":1667682211854,"execution_millis":10,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":11},{"cell_type":"code","source":"np.random.seed(42)\nlegnth_of_stay = []\nfor i in range(0, DATA_SIZE):\n    legnth_of_stay.append(np.random.randint(1,10))","metadata":{"tags":[],"cell_id":"11d1a0f34ce64c969d6926ec97c7a546","allow_embed":"code_output","source_hash":"dd56c00e","execution_start":1667682211891,"execution_millis":0,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":12},{"cell_type":"code","source":"import pandas as pd \ndf = pd.DataFrame({'height':height, 'weight':weight, \n                'age':age_int, 'sex':sex, 'insurance':insurance, \n                'specialty':specialty, 'name':names, 'state':state,\n                'length_of_stay':legnth_of_stay, 'smoker':smoker_category,\n                })","metadata":{"tags":[],"cell_id":"5ba86450352743a891328feffa0a56d8","allow_embed":"code_output","source_hash":"3cbbc126","execution_start":1667682211892,"execution_millis":0,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":13},{"cell_type":"code","source":"df['bmi'] = np.round((df['weight']/(df['height']**2))*703, 3)","metadata":{"tags":[],"cell_id":"60576d39941340758d8bafcefec4d7d6","allow_embed":"code_output","source_hash":"5f6a0d97","execution_start":1667682211892,"execution_millis":0,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":14},{"cell_type":"code","source":"df['readmission'] = [np.random.randint(0,2) for x in range(0, DATA_SIZE)]\ndf.drop_duplicates('name', inplace=True)\nnp.random.seed(42)\n\ndf_sample1 = df.sample(frac=0.2, replace=True, random_state=1)\n\ndf_sample2 = df[~df['name'].isin(list(set(df_sample1['name'])))]\n\ndf_sample2['readmission'] = 0 \ndf_sample2.loc[(df_sample2.bmi >=30) & (df_sample2.smoker == 'Yes') & (df_sample2.insurance == 'Medicare') & (df.length_of_stay >= 5), 'readmission'] = 1\n\ndf = df_sample2.append(df_sample1)","metadata":{"tags":[],"cell_id":"bf3897af187043c3a9f93ce14b46f308","allow_embed":"code","source_hash":"fe050939","execution_start":1667682211902,"execution_millis":13,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stderr","text":"/tmp/ipykernel_482/3297978708.py:9: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  df_sample2['readmission'] = 0\n/shared-libs/python3.8/py/lib/python3.8/site-packages/pandas/core/indexing.py:1720: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  self._setitem_single_column(loc, value, pi)\n","output_type":"stream"}],"execution_count":null},{"cell_type":"code","source":"from collections import Counter\nCounter(df['readmission'])","metadata":{"tags":[],"cell_id":"e5c9ce8b73f5427a8473e8b2d1141bdb","allow_embed":"code_output","source_hash":"7a636544","execution_start":1667682211936,"execution_millis":3,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","execution_count":16,"data":{"text/plain":"Counter({0: 4405, 1: 517})"},"metadata":{}}],"execution_count":null},{"cell_type":"code","source":"df['insurance'] = df['insurance'].astype('category')\ndf['insurance'] = df['insurance'].cat.codes\n\ndf['sex'] = df['sex'].astype('category')\ndf['sex'] = df['sex'].cat.codes\n\ndf['smoker'] = df['smoker'].astype('category')\ndf['smoker'] = df['smoker'].cat.codes\n\ndf['state'] = df['state'].astype('category')\ndf['state'] = df['state'].cat.codes","metadata":{"tags":[],"cell_id":"07c4712e868f4ccabd8b366cbb6cecc0","allow_embed":"code_output","source_hash":"dba22df1","execution_start":1667682211937,"execution_millis":0,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"X = df[['insurance', 'sex', 'smoker', 'state', 'height', 'weight', 'bmi', 'length_of_stay']]\ny = df['readmission']","metadata":{"tags":[],"cell_id":"e35f3707ec4f4fc48f22de4ba2ad6d12","allow_embed":"code_output","source_hash":"6e0c54a5","execution_start":1667682211937,"execution_millis":0,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"from sklearn.model_selection import train_test_split\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)","metadata":{"tags":[],"cell_id":"ca65ca43602d40acae72294846e67551","allow_embed":"code_output","source_hash":"4ba717ca","execution_start":1667682211943,"execution_millis":294,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"from catboost import CatBoostClassifier","metadata":{"tags":[],"cell_id":"4c0b089b1a7d4c4aab6af10b1ed1fe95","allow_embed":"code_output","source_hash":"390e05a9","execution_start":1667682212282,"execution_millis":1,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"cats = ['insurance', 'sex', 'smoker', 'state']\nmodel1 = CatBoostClassifier(cat_features= cats, iterations=10)\nmodel1.fit(X_train, y_train)","metadata":{"tags":[],"cell_id":"dc79a692aa364e36956d37ea1c4e9d9d","allow_embed":"code_output","source_hash":"33c229c7","execution_start":1667682212283,"execution_millis":55,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"Learning rate set to 0.5\n0:\tlearn: 0.4660763\ttotal: 48.2ms\tremaining: 434ms\n1:\tlearn: 0.3911385\ttotal: 49.5ms\tremaining: 198ms\n2:\tlearn: 0.3574981\ttotal: 50.5ms\tremaining: 118ms\n3:\tlearn: 0.3447903\ttotal: 51.3ms\tremaining: 76.9ms\n4:\tlearn: 0.3374272\ttotal: 52.3ms\tremaining: 52.3ms\n5:\tlearn: 0.3343143\ttotal: 53.4ms\tremaining: 35.6ms\n6:\tlearn: 0.3336231\ttotal: 54.2ms\tremaining: 23.2ms\n7:\tlearn: 0.3302381\ttotal: 55.2ms\tremaining: 13.8ms\n8:\tlearn: 0.3272703\ttotal: 56.2ms\tremaining: 6.24ms\n9:\tlearn: 0.3271818\ttotal: 57ms\tremaining: 0us\n","output_type":"stream"},{"output_type":"execute_result","execution_count":22,"data":{"text/plain":"<catboost.core.CatBoostClassifier at 0x7f3e5fc16970>"},"metadata":{}}],"execution_count":null},{"cell_type":"code","source":"y_pred = model1.predict(X_test)\nprint(Counter(y_pred))\nprint(Counter(y_test))\nfrom sklearn.metrics import precision_score\nprint(\"precision_score: \", precision_score(y_test, y_pred))","metadata":{"tags":[],"cell_id":"635e4532d41b4e4ebceb0abb79c92d95","allow_embed":"code_output","source_hash":"dc48d703","execution_start":1667682212339,"execution_millis":6,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"Counter({0: 1477})\nCounter({0: 1331, 1: 146})\nprecision_score:  0.0\n/shared-libs/python3.8/py/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1334: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n  _warn_prf(average, modifier, msg_start, len(result))\n","output_type":"stream"}],"execution_count":null},{"cell_type":"code","source":"from tabgan.sampler import GANGenerator\ncols = ['insurance', 'sex', 'smoker', 'state', 'height', 'weight', 'bmi', 'length_of_stay', 'readmission']\n\nX = df[cols[:-1]]\ny = pd.DataFrame(list(df['readmission']), columns=['readmission'])\nX.reset_index(inplace=True, drop=True)\ny.reset_index(inplace=True, drop=True)\n\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n\nX_train.reset_index(inplace=True, drop=True)\ny_train.reset_index(inplace=True, drop=True)\nX_test.reset_index(inplace=True, drop=True)\n\nnew_train2, new_target2 = GANGenerator(cat_cols = cols, epochs=2, is_post_process=False).generate_data_pipe(X_train, y_train, X_test,use_adversarial=False, only_generated_data=False)\n","metadata":{"tags":[],"cell_id":"ed84f947a62248599c44332d2da38d62","allow_embed":"code_output","source_hash":"86dfda","execution_start":1667682212348,"execution_millis":19978,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stderr","text":"/shared-libs/python3.8/py/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\nFitting CTGAN transformers for each column: 100%|██████████| 9/9 [00:00<00:00, 1783.71it/s]\nTraining CTGAN, epochs:: 100%|██████████| 2/2 [00:15<00:00,  7.81s/it]\n","output_type":"stream"}],"execution_count":null},{"cell_type":"code","source":"print(Counter(new_target2))","metadata":{"tags":[],"cell_id":"b4652ccfb314403ab03c69b44f9f2df8","allow_embed":"code_output","source_hash":"96721c11","execution_start":1667682232332,"execution_millis":8,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"Counter({0: 8102, 1: 2921})\n","output_type":"stream"}],"execution_count":null},{"cell_type":"code","source":"new_train2.head()\n","metadata":{"tags":[],"cell_id":"656087f70db24a298ac3a577e9b32507","allow_embed":"code_output","source_hash":"d39d1029","execution_start":1667682232338,"execution_millis":22,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","execution_count":27,"data":{"application/vnd.deepnote.dataframe.v3+json":{"column_count":8,"row_count":5,"columns":[{"name":"insurance","dtype":"int8","stats":{"unique_count":4,"nan_count":0,"min":"1","max":"5","histogram":[{"bin_start":1,"bin_end":1.4,"count":1},{"bin_start":1.4,"bin_end":1.8,"count":0},{"bin_start":1.8,"bin_end":2.2,"count":2},{"bin_start":2.2,"bin_end":2.6,"count":0},{"bin_start":2.6,"bin_end":3,"count":0},{"bin_start":3,"bin_end":3.4000000000000004,"count":1},{"bin_start":3.4000000000000004,"bin_end":3.8000000000000003,"count":0},{"bin_start":3.8000000000000003,"bin_end":4.2,"count":0},{"bin_start":4.2,"bin_end":4.6,"count":0},{"bin_start":4.6,"bin_end":5,"count":1}]}},{"name":"sex","dtype":"int8","stats":{"unique_count":2,"nan_count":0,"min":"0","max":"1","histogram":[{"bin_start":0,"bin_end":0.1,"count":2},{"bin_start":0.1,"bin_end":0.2,"count":0},{"bin_start":0.2,"bin_end":0.30000000000000004,"count":0},{"bin_start":0.30000000000000004,"bin_end":0.4,"count":0},{"bin_start":0.4,"bin_end":0.5,"count":0},{"bin_start":0.5,"bin_end":0.6000000000000001,"count":0},{"bin_start":0.6000000000000001,"bin_end":0.7000000000000001,"count":0},{"bin_start":0.7000000000000001,"bin_end":0.8,"count":0},{"bin_start":0.8,"bin_end":0.9,"count":0},{"bin_start":0.9,"bin_end":1,"count":3}]}},{"name":"smoker","dtype":"int8","stats":{"unique_count":2,"nan_count":0,"min":"0","max":"1","histogram":[{"bin_start":0,"bin_end":0.1,"count":2},{"bin_start":0.1,"bin_end":0.2,"count":0},{"bin_start":0.2,"bin_end":0.30000000000000004,"count":0},{"bin_start":0.30000000000000004,"bin_end":0.4,"count":0},{"bin_start":0.4,"bin_end":0.5,"count":0},{"bin_start":0.5,"bin_end":0.6000000000000001,"count":0},{"bin_start":0.6000000000000001,"bin_end":0.7000000000000001,"count":0},{"bin_start":0.7000000000000001,"bin_end":0.8,"count":0},{"bin_start":0.8,"bin_end":0.9,"count":0},{"bin_start":0.9,"bin_end":1,"count":3}]}},{"name":"state","dtype":"int8","stats":{"unique_count":5,"nan_count":0,"min":"0","max":"34","histogram":[{"bin_start":0,"bin_end":3.4,"count":1},{"bin_start":3.4,"bin_end":6.8,"count":0},{"bin_start":6.8,"bin_end":10.2,"count":1},{"bin_start":10.2,"bin_end":13.6,"count":1},{"bin_start":13.6,"bin_end":17,"count":0},{"bin_start":17,"bin_end":20.4,"count":0},{"bin_start":20.4,"bin_end":23.8,"count":1},{"bin_start":23.8,"bin_end":27.2,"count":0},{"bin_start":27.2,"bin_end":30.599999999999998,"count":0},{"bin_start":30.599999999999998,"bin_end":34,"count":1}]}},{"name":"height","dtype":"float64","stats":{"unique_count":5,"nan_count":0,"min":"54.62","max":"76.08","histogram":[{"bin_start":54.62,"bin_end":56.766,"count":1},{"bin_start":56.766,"bin_end":58.912,"count":0},{"bin_start":58.912,"bin_end":61.058,"count":0},{"bin_start":61.058,"bin_end":63.20399999999999,"count":0},{"bin_start":63.20399999999999,"bin_end":65.35,"count":1},{"bin_start":65.35,"bin_end":67.496,"count":0},{"bin_start":67.496,"bin_end":69.642,"count":2},{"bin_start":69.642,"bin_end":71.788,"count":0},{"bin_start":71.788,"bin_end":73.934,"count":0},{"bin_start":73.934,"bin_end":76.08,"count":1}]}},{"name":"weight","dtype":"float64","stats":{"unique_count":5,"nan_count":0,"min":"62.9","max":"250.66","histogram":[{"bin_start":62.9,"bin_end":81.676,"count":1},{"bin_start":81.676,"bin_end":100.452,"count":0},{"bin_start":100.452,"bin_end":119.22800000000001,"count":0},{"bin_start":119.22800000000001,"bin_end":138.004,"count":0},{"bin_start":138.004,"bin_end":156.78,"count":1},{"bin_start":156.78,"bin_end":175.556,"count":0},{"bin_start":175.556,"bin_end":194.332,"count":2},{"bin_start":194.332,"bin_end":213.108,"count":0},{"bin_start":213.108,"bin_end":231.88400000000001,"count":0},{"bin_start":231.88400000000001,"bin_end":250.66,"count":1}]}},{"name":"bmi","dtype":"float64","stats":{"unique_count":5,"nan_count":0,"min":"14.822","max":"30.444","histogram":[{"bin_start":14.822,"bin_end":16.3842,"count":1},{"bin_start":16.3842,"bin_end":17.9464,"count":0},{"bin_start":17.9464,"bin_end":19.5086,"count":0},{"bin_start":19.5086,"bin_end":21.0708,"count":0},{"bin_start":21.0708,"bin_end":22.633,"count":0},{"bin_start":22.633,"bin_end":24.1952,"count":0},{"bin_start":24.1952,"bin_end":25.757399999999997,"count":1},{"bin_start":25.757399999999997,"bin_end":27.3196,"count":1},{"bin_start":27.3196,"bin_end":28.8818,"count":1},{"bin_start":28.8818,"bin_end":30.444,"count":1}]}},{"name":"length_of_stay","dtype":"int64","stats":{"unique_count":4,"nan_count":0,"min":"1","max":"6","histogram":[{"bin_start":1,"bin_end":1.5,"count":1},{"bin_start":1.5,"bin_end":2,"count":0},{"bin_start":2,"bin_end":2.5,"count":0},{"bin_start":2.5,"bin_end":3,"count":0},{"bin_start":3,"bin_end":3.5,"count":0},{"bin_start":3.5,"bin_end":4,"count":0},{"bin_start":4,"bin_end":4.5,"count":1},{"bin_start":4.5,"bin_end":5,"count":0},{"bin_start":5,"bin_end":5.5,"count":1},{"bin_start":5.5,"bin_end":6,"count":2}]}},{"name":"_deepnote_index_column","dtype":"int64"}],"rows":[{"insurance":"2","sex":"0","smoker":"0","state":"13","height":"54.62","weight":"62.9","bmi":"14.822","length_of_stay":"5","_deepnote_index_column":"0"},{"insurance":"1","sex":"1","smoker":"1","state":"34","height":"76.08","weight":"250.66","bmi":"30.444","length_of_stay":"4","_deepnote_index_column":"1"},{"insurance":"5","sex":"0","smoker":"0","state":"0","height":"67.56","weight":"176.11","bmi":"27.124","length_of_stay":"6","_deepnote_index_column":"2"},{"insurance":"3","sex":"1","smoker":"1","state":"23","height":"63.21","weight":"138.11","bmi":"24.3","length_of_stay":"6","_deepnote_index_column":"3"},{"insurance":"2","sex":"1","smoker":"1","state":"8","height":"68.85","weight":"187.4","bmi":"27.792","length_of_stay":"1","_deepnote_index_column":"4"}]},"text/plain":"   insurance  sex  smoker  state  height  weight     bmi  length_of_stay\n0          2    0       0     13   54.62   62.90  14.822               5\n1          1    1       1     34   76.08  250.66  30.444               4\n2          5    0       0      0   67.56  176.11  27.124               6\n3          3    1       1     23   63.21  138.11  24.300               6\n4          2    1       1      8   68.85  187.40  27.792               1","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>insurance</th>\n      <th>sex</th>\n      <th>smoker</th>\n      <th>state</th>\n      <th>height</th>\n      <th>weight</th>\n      <th>bmi</th>\n      <th>length_of_stay</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>13</td>\n      <td>54.62</td>\n      <td>62.90</td>\n      <td>14.822</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>34</td>\n      <td>76.08</td>\n      <td>250.66</td>\n      <td>30.444</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>5</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>67.56</td>\n      <td>176.11</td>\n      <td>27.124</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>23</td>\n      <td>63.21</td>\n      <td>138.11</td>\n      <td>24.300</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>8</td>\n      <td>68.85</td>\n      <td>187.40</td>\n      <td>27.792</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"metadata":{}}],"execution_count":null},{"cell_type":"code","source":"cats = ['insurance', 'sex', 'smoker', 'state']\nmodel2 = CatBoostClassifier(cat_features= cats, iterations=10)\nmodel2.fit(new_train2, new_target2)","metadata":{"tags":[],"cell_id":"2fd9973aa39548a09690fbe0f8b73da0","allow_embed":"code_output","source_hash":"dc3d76c6","execution_start":1667684407711,"execution_millis":33,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"Learning rate set to 0.5\n0:\tlearn: 0.6802753\ttotal: 1.61ms\tremaining: 14.5ms\n1:\tlearn: 0.6708204\ttotal: 3.13ms\tremaining: 12.5ms\n2:\tlearn: 0.6643163\ttotal: 4.39ms\tremaining: 10.3ms\n3:\tlearn: 0.6596202\ttotal: 5.67ms\tremaining: 8.51ms\n4:\tlearn: 0.6588420\ttotal: 6.93ms\tremaining: 6.93ms\n5:\tlearn: 0.6580290\ttotal: 8.15ms\tremaining: 5.43ms\n6:\tlearn: 0.6572360\ttotal: 9.41ms\tremaining: 4.03ms\n7:\tlearn: 0.6555131\ttotal: 10.6ms\tremaining: 2.64ms\n8:\tlearn: 0.6538899\ttotal: 11.8ms\tremaining: 1.31ms\n9:\tlearn: 0.6511306\ttotal: 13ms\tremaining: 0us\n","output_type":"stream"},{"output_type":"execute_result","execution_count":33,"data":{"text/plain":"<catboost.core.CatBoostClassifier at 0x7f3eb99c4430>"},"metadata":{}}],"execution_count":null},{"cell_type":"code","source":"y_pred2 = model2.predict(X_test)\nprint(Counter(y_pred2))\nprint(Counter(y_test['readmission']))\nfrom sklearn.metrics import precision_score\nprint(\"precision_score: \", precision_score(y_test, y_pred2))","metadata":{"tags":[],"cell_id":"576566cfca4e4822a30fbf42f452c6c4","allow_embed":"code_output","source_hash":"b45bd8c7","execution_start":1667684412101,"execution_millis":5,"deepnote_to_be_reexecuted":false,"deepnote_cell_type":"code"},"outputs":[{"name":"stdout","text":"Counter({0: 1471, 1: 6})\nCounter({0: 1331, 1: 146})\nprecision_score:  0.16666666666666666\n","output_type":"stream"}],"execution_count":null},{"cell_type":"markdown","source":"<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=1adae434-40f7-494d-b18c-c2d2f104fb0b' target=\"_blank\">\n<img alt='Created in deepnote.com' style='display:inline;max-height:16px;margin:0px;margin-right:7.5px;' src='data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iODBweCIgaGVpZ2h0PSI4MHB4IiB2aWV3Qm94PSIwIDAgODAgODAiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDU0LjEgKDc2NDkwKSAtIGh0dHBzOi8vc2tldGNoYXBwLmNvbSAtLT4KICAgIDx0aXRsZT5Hcm91cCAzPC90aXRsZT4KICAgIDxkZXNjPkNyZWF0ZWQgd2l0aCBTa2V0Y2guPC9kZXNjPgogICAgPGcgaWQ9IkxhbmRpbmciIHN0cm9rZT0ibm9uZSIgc3Ryb2tlLXdpZHRoPSIxIiBmaWxsPSJub25lIiBmaWxsLXJ1bGU9ImV2ZW5vZGQiPgogICAgICAgIDxnIGlkPSJBcnRib2FyZCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTEyMzUuMDAwMDAwLCAtNzkuMDAwMDAwKSI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0zIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgxMjM1LjAwMDAwMCwgNzkuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8cG9seWdvbiBpZD0iUGF0aC0yMCIgZmlsbD0iIzAyNjVCNCIgcG9pbnRzPSIyLjM3NjIzNzYyIDgwIDM4LjA0NzY2NjcgODAgNTcuODIxNzgyMiA3My44MDU3NTkyIDU3LjgyMTc4MjIgMzIuNzU5MjczOSAzOS4xNDAyMjc4IDMxLjY4MzE2ODMiPjwvcG9seWdvbj4KICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0zNS4wMDc3MTgsODAgQzQyLjkwNjIwMDcsNzYuNDU0OTM1OCA0Ny41NjQ5MTY3LDcxLjU0MjI2NzEgNDguOTgzODY2LDY1LjI2MTk5MzkgQzUxLjExMjI4OTksNTUuODQxNTg0MiA0MS42NzcxNzk1LDQ5LjIxMjIyODQgMjUuNjIzOTg0Niw0OS4yMTIyMjg0IEMyNS40ODQ5Mjg5LDQ5LjEyNjg0NDggMjkuODI2MTI5Niw0My4yODM4MjQ4IDM4LjY0NzU4NjksMzEuNjgzMTY4MyBMNzIuODcxMjg3MSwzMi41NTQ0MjUgTDY1LjI4MDk3Myw2Ny42NzYzNDIxIEw1MS4xMTIyODk5LDc3LjM3NjE0NCBMMzUuMDA3NzE4LDgwIFoiIGlkPSJQYXRoLTIyIiBmaWxsPSIjMDAyODY4Ij48L3BhdGg+CiAgICAgICAgICAgICAgICA8cGF0aCBkPSJNMCwzNy43MzA0NDA1IEwyNy4xMTQ1MzcsMC4yNTcxMTE0MzYgQzYyLjM3MTUxMjMsLTEuOTkwNzE3MDEgODAsMTAuNTAwMzkyNyA4MCwzNy43MzA0NDA1IEM4MCw2NC45NjA0ODgyIDY0Ljc3NjUwMzgsNzkuMDUwMzQxNCAzNC4zMjk1MTEzLDgwIEM0Ny4wNTUzNDg5LDc3LjU2NzA4MDggNTMuNDE4MjY3Nyw3MC4zMTM2MTAzIDUzLjQxODI2NzcsNTguMjM5NTg4NSBDNTMuNDE4MjY3Nyw0MC4xMjg1NTU3IDM2LjMwMzk1NDQsMzcuNzMwNDQwNSAyNS4yMjc0MTcsMzcuNzMwNDQwNSBDMTcuODQzMDU4NiwzNy43MzA0NDA1IDkuNDMzOTE5NjYsMzcuNzMwNDQwNSAwLDM3LjczMDQ0MDUgWiIgaWQ9IlBhdGgtMTkiIGZpbGw9IiMzNzkzRUYiPjwvcGF0aD4KICAgICAgICAgICAgPC9nPgogICAgICAgIDwvZz4KICAgIDwvZz4KPC9zdmc+' > </img>\nCreated in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>","metadata":{"tags":[],"created_in_deepnote_cell":true,"deepnote_cell_type":"markdown"}}],"nbformat":4,"nbformat_minor":0,"metadata":{"deepnote":{},"orig_nbformat":2,"deepnote_notebook_id":"b55c6a4c4fc1498c924a7875e7880421","deepnote_persisted_session":{"createdAt":"2022-11-05T22:02:21.023Z"},"deepnote_execution_queue":[]}}


--------------------------------------------------------------------------------