├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── MIT-LICENSE.TXT ├── Makefile ├── README.md ├── funpymodeling ├── __init__.py ├── data_prep.py ├── exploratory.py ├── model_validation.py └── test │ ├── __init__.py │ └── test_funpymodeling.py ├── notebooks └── quick-start_eng_v1.ipynb ├── poetry.lock └── pyproject.toml /.gitignore: -------------------------------------------------------------------------------- 1 | # Custom 2 | my_env/ 3 | 4 | # General 5 | syntax: glob 6 | .python-version 7 | .venv 8 | env/* 9 | venv/* 10 | ENV/* 11 | .idea/* 12 | .DS_Store 13 | dython.egg*/* 14 | *run_stuff.py* 15 | build/* 16 | dist/* 17 | build_deploy.sh 18 | site/* 19 | debug.py 20 | AUX/ 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | *.pyc 25 | *.ipynb_checkpoints/ 26 | funPyModeling.egg-info/ 27 | .ipynb_checkpoints/* 28 | funpymodeling/.ipynb_checkpoints/* 29 | # Distribution / packaging 30 | .Python 31 | build/ 32 | develop-eggs/ 33 | .pytest_cache/ 34 | dist/ 35 | downloads/ 36 | eggs/ 37 | .eggs/ 38 | lib/ 39 | lib64/ 40 | parts/ 41 | sdist/ 42 | var/ 43 | wheels/ 44 | *.egg-info/ 45 | .installed.cfg 46 | *.egg 47 | 48 | # PyInstaller 49 | # Usually these files are written by a python script from a template 50 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 51 | *.manifest 52 | *.spec 53 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://gitlab.com/pycqa/flake8 3 | rev: 3.7.9 4 | hooks: 5 | - id: flake8 6 | name: flake8 except __init__.py 7 | args: [--exclude=__init__.py] 8 | - id: flake8 9 | name: flake8 only __init__.py 10 | args: [--ignore=F401] # ignore imported unused in __init__.py 11 | files: __init__.py 12 | - repo: local 13 | hooks: 14 | - id: pytest 15 | name: Check pytest unit tests pass 16 | entry: make test 17 | language: system -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at {pcasas.biz@gmail.com} . All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /MIT-LICENSE.TXT: -------------------------------------------------------------------------------- 1 | Copyright 2020 Pablo Casas 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install 2 | 3 | install: 4 | poetry install 5 | 6 | .PHONY: test 7 | 8 | test: 9 | poetry run pytest --pyargs funpymodeling 10 | 11 | .PHONY: check_style 12 | 13 | check_style: 14 | poetry run flake8 --exclude=__init__.py 15 | poetry run flake8 --ignore F401 funpymodeling/__init__.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # funPyModeling 2 | A package to help data scientist in Exploratory Data Analysis and Data Preparation for ML models 3 | -------------------------------------------------------------------------------- /funpymodeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_prep import todf 2 | from .exploratory import status, corr_pair, num_vars, cat_vars, profiling_num, freq_tbl 3 | from .model_validation import coord_plot 4 | 5 | 6 | __version__ = "0.1.7" 7 | -------------------------------------------------------------------------------- /funpymodeling/data_prep.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | def todf(data): 5 | """ 6 | It converts almost any object to pandas dataframe. It supports: 1D/2D list, 1D/2D arrays, pandas series. If the object containts +2D it returns an error. 7 | Parameters: 8 | ----------- 9 | data: data 10 | 11 | Returns: 12 | -------- 13 | A pandas dataframe. 14 | 15 | Example: 16 | -------- 17 | >> from numpy import array 18 | 19 | # Different case study: 20 | >> list_1d = [11, 12, 5, 2] 21 | >> todf(list_1d) 22 | >> list_2d = [[11, 12, 5, 2], [15,24, 6,10], [10, 8, 12, 5], [12,15,8,6]] 23 | >> todf(list_2d) 24 | >> list_3d = [[[11, 12, 5, 2], [15,24, 6,10], [10, 8, 12, 5], [12,15,8,6]]] 25 | >> todf(list_3d) 26 | >> array_1d = array(list_1d) 27 | >> todf(array_1d) 28 | >> array_2d = array(list_2d) 29 | >> todf(array_2d) 30 | >> pd_df=pd.DataFrame({'v1':[11, 12, 5, 2], 'v2':[15,24, 6,10]}) # ok 31 | >> todf(pd_df) 32 | >> pd_series=pd_df.v1 33 | """ 34 | if isinstance(data, list): 35 | data=np.array(data) 36 | 37 | if(len(data.shape))>2: 38 | raise Exception("I live in flattland! (can't handle objects with more than 2 dimensions)") 39 | 40 | if isinstance(data, pd.Series): 41 | data2=pd.DataFrame({data.name: data}) 42 | elif isinstance(data, np.ndarray): 43 | if(data.shape==1): 44 | data2=pd.DataFrame({'var': data}).convert_dtypes() 45 | else: 46 | data2=pd.DataFrame(data).convert_dtypes() 47 | else: 48 | data2=data 49 | 50 | return data2 51 | 52 | -------------------------------------------------------------------------------- /funpymodeling/exploratory.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from .data_prep import todf 4 | 5 | def status(data): 6 | """ 7 | For each variable it returns: Quantity and percentage of zeros (q_zeros and p_zeros respectevly). Same metrics for NA values (q_NA/p_na), and infinite values (q_inf/p_inf). Last two columns indicates data type and quantity of unique values. 8 | status can be used for EDA or in a data flow to spot errors or take actions based on the result. 9 | 10 | Parameters: 11 | ----------- 12 | data: It can be a dataframe or a single column, 1D or 2D numpy array. It uses the todf() function. 13 | 14 | Returns: 15 | -------- 16 | A pandas dataframe containing the status metrics for each input variable. 17 | 18 | Example: 19 | -------- 20 | >> import seaborn as sns 21 | >> iris = sns.load_dataset('iris') 22 | >> # dataframe as input 23 | >> status(iris) 24 | >> # single columns: 25 | >> status(iris['species']) 26 | """ 27 | data2=todf(data) 28 | 29 | # total de rows 30 | tot_rows=len(data2) 31 | 32 | # total de nan 33 | d2=data2.isnull().sum().reset_index() 34 | d2.columns=['variable', 'q_nan'] 35 | 36 | # percentage of nan 37 | d2[['p_nan']]=d2[['q_nan']]/tot_rows 38 | 39 | # num of zeros 40 | d2['q_zeros']=(data2==0).sum().values 41 | 42 | # perc of zeros 43 | d2['p_zeros']=d2[['q_zeros']]/tot_rows 44 | 45 | # total unique values 46 | d2['unique']=data2.nunique().values 47 | 48 | # get data types per column 49 | d2['type']=[str(x) for x in data2.dtypes.values] 50 | 51 | return(d2) 52 | 53 | 54 | def corr_pair(data, method='pearson'): 55 | """ 56 | Calcuate the correlations among all numeric features. Non-numeric are excluded since it uses the `corr` pandas function. 57 | It's useful to quickly extract those correlated input features and the correlation between the input and the target variable. 58 | 59 | Parameters: 60 | ----------- 61 | data: pandas data containing the variables to calculate the correlation 62 | method: `pearson` as default, same as `corr` function in pandas. 63 | Returns: 64 | -------- 65 | A pandas dataframe containing pairwaise correlation, R and R2 statistcs 66 | 67 | Example: 68 | -------- 69 | >> import seaborn as sns 70 | >> iris = sns.load_dataset('iris') 71 | >> corr_pair(iris) 72 | """ 73 | data2=todf(data) 74 | 75 | d_cor=data2.corr(method) 76 | 77 | d_cor2=d_cor.reset_index() # generates index as column 78 | 79 | d_long=d_cor2.melt(id_vars='index') # to long format, each row 1 var 80 | 81 | d_long.columns=['v1', 'v2', 'R'] 82 | 83 | d_long[['R2']]=d_long[['R']]**2 84 | 85 | d_long2=d_long.query("v1 != v2") # don't need the auto-correlation 86 | 87 | return(d_long2) 88 | 89 | 90 | def num_vars(data, exclude_var=None): 91 | """ 92 | Returns the numeric variable names. Useful to use with pipelines or any other method in which we need to keep numeric variables. It `exclude_var` can be a list with the variable names to skip in the result. Useful when we want to skip the target variable (i.e. in a data transformation). 93 | It's also available for categorical variables in the function `cat_vars()` 94 | Parameters: 95 | ----------- 96 | data: pandas dataframe 97 | exclude_var: list of variable names to exclude from the result 98 | 99 | Returns: 100 | -------- 101 | A list with all the numeric variable names. 102 | 103 | Example: 104 | -------- 105 | >> import seaborn as sns 106 | >> iris = sns.load_dataset('iris') 107 | >> num_vars(iris) 108 | """ 109 | num_v = data.select_dtypes(include=['int64', 'float64']).columns 110 | if exclude_var is not None: 111 | num_v=num_v.drop(exclude_var) 112 | return num_v 113 | 114 | def cat_vars(data, exclude_var=None): 115 | """ 116 | Returns the categoric variable names. Useful to use with pipelines or any other method in which we need to keep categorical variables. It `exclude_var` can be a list with the variable names to skip in the result. Useful when we want to skip the target variable (i.e. in a data transformation). It will include all `object`, `category` and `string` variables. 117 | It's also available for numeric variables in the function `num_vars()` 118 | 119 | Parameters: 120 | ----------- 121 | data: pandas dataframe 122 | exclude_var: list of variable names to exclude from the result 123 | 124 | Returns: 125 | -------- 126 | A list with all the categoric variable names. 127 | 128 | Example: 129 | -------- 130 | >> import seaborn as sns 131 | >> iris = sns.load_dataset('iris') 132 | >> cat_vars(iris) 133 | """ 134 | cat_v = data.select_dtypes(include=['object','category', 'string']).columns 135 | if exclude_var is not None: 136 | cat_v=cat_v.drop(exclude_var) 137 | return cat_v 138 | 139 | 140 | def profiling_num(data): 141 | """ 142 | Get a metric table with many indicators for all numerical variables, automatically skipping the non-numerical variables. Current metrics are: mean, std_dev: standard deviation, all the p_XX: percentile at XX number, skewness, kurtosis, iqr: inter quartile range, variation_coef: the ratio of sd/mean, range_98 is the limit for which the 98% of fall, range_80 similar to range_98 but with 80%. All NA values will be skipped from calculations. 143 | 144 | Parameters: 145 | ----------- 146 | data: pandas series/dataframe, numpy 1D/2D array 147 | 148 | Returns: 149 | -------- 150 | A dataframe in which each row is an input variable, and each column an statistic. 151 | 152 | Example: 153 | -------- 154 | >> import seaborn as sns 155 | >> iris = sns.load_dataset('iris') 156 | >> profiling_num(iris) 157 | """ 158 | 159 | # handling different inputs to dataframe 160 | data=todf(data) 161 | 162 | # explicit keep the num vars 163 | d=data[num_vars(data)] 164 | 165 | des1=pd.DataFrame({'mean':d.mean().transpose(), 166 | 'std_dev':d.std().transpose()}) 167 | 168 | des1['variation_coef']=des1['std_dev']/des1['mean'] 169 | 170 | d_quant=d.quantile([0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]).transpose().add_prefix('p_') 171 | 172 | des2=des1.join(d_quant, how='outer') 173 | 174 | des_final=des2.copy() 175 | 176 | des_final['variable'] = des_final.index 177 | 178 | des_final=des_final.reset_index(drop=True) 179 | 180 | des_final=des_final[['variable', 'mean', 'std_dev','variation_coef', 'p_0.01', 'p_0.05', 'p_0.25', 'p_0.5', 'p_0.75', 'p_0.95', 'p_0.99']] 181 | 182 | return des_final 183 | 184 | 185 | 186 | def _freq_tbl_logic(var, name): 187 | """ 188 | For internal use. Related to `freq_tbl`. 189 | 190 | Parameters: 191 | ----------- 192 | var: pandas series 193 | name: column name (string) 194 | 195 | Returns: 196 | -------- 197 | Dataframe with the metrics 198 | 199 | Example: 200 | -------- 201 | 202 | """ 203 | cnt=var.value_counts() 204 | df_res=pd.DataFrame({'frequency': var.value_counts(), 'percentage': var.value_counts()/len(var)}) 205 | df_res.reset_index(drop=True) 206 | 207 | df_res[name] = df_res.index 208 | 209 | df_res=df_res.reset_index(drop=True) 210 | 211 | df_res['cumulative_perc'] = df_res.percentage.cumsum()/df_res.percentage.sum() 212 | 213 | df_res=df_res[[name, 'frequency', 'percentage', 'cumulative_perc']] 214 | 215 | return df_res 216 | 217 | 218 | 219 | def freq_tbl(data): 220 | """ 221 | Frequency table for categorical variables. It retrieves the frequency, perrcentage and cummulative percentage for each categorical variables (excluding the numerical ones). 222 | 223 | Parameters: 224 | ----------- 225 | data: pandas series/dataframe, numpy 1D/2D array 226 | 227 | Returns: 228 | -------- 229 | If a single variable is passed, then it returns the table with the results (useful to be used in a processes and take actions based on the result.). 230 | If it contains more than one varible, it will print in the console the result for all the categorical variables (based on cat_vars). 231 | 232 | Example: 233 | -------- 234 | > import seaborn as sns 235 | > tips=sns.load_dataset('tips') 236 | > freq_tbl(tips) 237 | """ 238 | data=todf(data) 239 | 240 | cat_v=cat_vars(data) 241 | if(len(cat_v)==0): 242 | return('No categorical variables to analyze.') 243 | 244 | if(len(cat_v)>1): 245 | for col in cat_v: 246 | print(_freq_tbl_logic(data[col], name=col)) 247 | print('\n----------------------------------------------------------------\n') 248 | else: 249 | # if only 1 column, then return the table for that variable 250 | col=cat_v[0] 251 | return _freq_tbl_logic(data[col], name=col) 252 | 253 | -------------------------------------------------------------------------------- /funpymodeling/model_validation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from pandas.plotting import parallel_coordinates 4 | from sklearn.preprocessing import MinMaxScaler 5 | import seaborn as sns 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | def coord_plot(data, group_var): 10 | """ 11 | Coordinate plot analysis for clustering models. Also returns the original and the normalized (min-max) variable table. Useful to extract the main features for each cluster according to the variable means. 12 | Parameters: 13 | ----------- 14 | data : Pandas DataFrame containing the variables to analyze the mean across each cluster 15 | group_var : String indicating the clustering variable name 16 | Returns: 17 | -------- 18 | A tuple containing two data frames. The first contains the mean for each category across each value of the group_var. The other data set is similar but it is min-max normalized, range [0-1]. 19 | It also shows the coordinate or parallel plot. 20 | Example: 21 | -------- 22 | >> import seaborn as sns 23 | >> iris = sns.load_dataset('iris') 24 | # If species is the cluster variable: 25 | >> coord_plot(iris, 'species') 26 | """ 27 | # 1- group by cluster, get the means 28 | x_grp=data.groupby(group_var).mean() 29 | x_grp[group_var] = x_grp.index 30 | x_grp=x_grp.reset_index(drop=True) 31 | x_grp # data with the original variables 32 | 33 | # 2- normalizing the data min-max 34 | x_grp_no_tgt=x_grp.drop(group_var, axis=1) 35 | 36 | mm_scaler = MinMaxScaler() 37 | mm_scaler.fit(x_grp_no_tgt) 38 | x_grp_mm=mm_scaler.transform(x_grp_no_tgt) 39 | 40 | # 3- convert to df 41 | df_grp_mm=pd.DataFrame(x_grp_mm, columns=x_grp_no_tgt.columns) 42 | 43 | df_grp_mm[group_var]=x_grp[group_var] # variables escaladas 44 | 45 | # 4- plot 46 | parallel_coordinates(df_grp_mm, group_var, colormap=plt.get_cmap("Dark2")) 47 | plt.xticks(rotation=90) 48 | 49 | return [x_grp, df_grp_mm] -------------------------------------------------------------------------------- /funpymodeling/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablo14/funpymodeling/b399804d5981e8873302980627fb6cc4139d1a10/funpymodeling/test/__init__.py -------------------------------------------------------------------------------- /funpymodeling/test/test_funpymodeling.py: -------------------------------------------------------------------------------- 1 | from funpymodeling import __version__ 2 | 3 | 4 | def test_version(): 5 | assert __version__ == '0.1.7' 6 | -------------------------------------------------------------------------------- /notebooks/quick-start_eng_v1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# funpymodeling > Basic usage\n", 8 | "\n", 9 | "Created by Pablo Casas [@pabloc_ds](https://twitter.com/pabloc_ds)\n", 10 | "\n", 11 | "" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 48, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "import numpy as np\n", 23 | "import seaborn as sns" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 49, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# Load some dataframes for this practice:\n", 33 | "iris = sns.load_dataset('iris')\n", 34 | "tips = sns.load_dataset('tips')" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## 1) Exploratory Data Analysis" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "### 1.1) Dataset health `status`" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "from funpymodeling.exploratory import status" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "Support data frame as input:" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 5, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "text/html": [ 75 | "
\n", 76 | "\n", 89 | "\n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | "
variableq_nanp_nanq_zerosp_zerosuniquetype
0sepal_length00.000.035float64
1sepal_width00.000.023float64
2petal_length00.000.043float64
3petal_width00.000.022float64
4species00.000.03object
\n", 155 | "
" 156 | ], 157 | "text/plain": [ 158 | " variable q_nan p_nan q_zeros p_zeros unique type\n", 159 | "0 sepal_length 0 0.0 0 0.0 35 float64\n", 160 | "1 sepal_width 0 0.0 0 0.0 23 float64\n", 161 | "2 petal_length 0 0.0 0 0.0 43 float64\n", 162 | "3 petal_width 0 0.0 0 0.0 22 float64\n", 163 | "4 species 0 0.0 0 0.0 3 object" 164 | ] 165 | }, 166 | "execution_count": 5, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "status(iris) " 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "Supports Pandas series:" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 6, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "data": { 189 | "text/html": [ 190 | "
\n", 191 | "\n", 204 | "\n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | "
variableq_nanp_nanq_zerosp_zerosuniquetype
0sepal_width00.000.023float64
\n", 230 | "
" 231 | ], 232 | "text/plain": [ 233 | " variable q_nan p_nan q_zeros p_zeros unique type\n", 234 | "0 sepal_width 0 0.0 0 0.0 23 float64" 235 | ] 236 | }, 237 | "execution_count": 6, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "status(iris['sepal_width'])" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "Supports 2D numpy array:" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 7, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "tips_np=tips.to_numpy()" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 8, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "data": { 269 | "text/html": [ 270 | "
\n", 271 | "\n", 284 | "\n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | "
variableq_nanp_nanq_zerosp_zerosuniquetype
0000.000.0229float64
1100.000.0123float64
2200.000.02string
3300.000.02string
4400.000.04string
5500.000.02string
6600.000.06Int64
\n", 370 | "
" 371 | ], 372 | "text/plain": [ 373 | " variable q_nan p_nan q_zeros p_zeros unique type\n", 374 | "0 0 0 0.0 0 0.0 229 float64\n", 375 | "1 1 0 0.0 0 0.0 123 float64\n", 376 | "2 2 0 0.0 0 0.0 2 string\n", 377 | "3 3 0 0.0 0 0.0 2 string\n", 378 | "4 4 0 0.0 0 0.0 4 string\n", 379 | "5 5 0 0.0 0 0.0 2 string\n", 380 | "6 6 0 0.0 0 0.0 6 Int64" 381 | ] 382 | }, 383 | "execution_count": 8, 384 | "metadata": {}, 385 | "output_type": "execute_result" 386 | } 387 | ], 388 | "source": [ 389 | "status(tips_np)" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": {}, 395 | "source": [ 396 | "Note: data types form numpy to pandas dataframe are infered by: pandas `convert_dtypes`" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "Supports 1D numpy array:" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 9, 409 | "metadata": {}, 410 | "outputs": [ 411 | { 412 | "data": { 413 | "text/html": [ 414 | "
\n", 415 | "\n", 428 | "\n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | "
variableq_nanp_nanq_zerosp_zerosuniquetype
0000.000.04string
\n", 454 | "
" 455 | ], 456 | "text/plain": [ 457 | " variable q_nan p_nan q_zeros p_zeros unique type\n", 458 | "0 0 0 0.0 0 0.0 4 string" 459 | ] 460 | }, 461 | "execution_count": 9, 462 | "metadata": {}, 463 | "output_type": "execute_result" 464 | } 465 | ], 466 | "source": [ 467 | "status(tips_np[:,4])" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [ 476 | "\n", 477 | "\n", 478 | "\n", 479 | "\n", 480 | "\n", 481 | "\n", 482 | "\n", 483 | "\n" 484 | ] 485 | }, 486 | { 487 | "cell_type": "markdown", 488 | "metadata": {}, 489 | "source": [ 490 | "### 1.2) Univariate analysis in numeric variables" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 11, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [ 499 | "from funpymodeling.exploratory import profiling_num" 500 | ] 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "metadata": {}, 505 | "source": [ 506 | "`profiling_num` retrieves several statistics for all numeric variables excluding the categorical ones." 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": {}, 512 | "source": [ 513 | "Supports dataframe:" 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": 12, 519 | "metadata": {}, 520 | "outputs": [ 521 | { 522 | "data": { 523 | "text/html": [ 524 | "
\n", 525 | "\n", 538 | "\n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | "
variablemeanstd_devvariation_coefp_0.01p_0.05p_0.25p_0.5p_0.75p_0.95p_0.99
0total_bill19.7859438.9024120.4499367.259.557513.347517.79524.127538.061048.2270
1tip2.9982791.3836380.4614781.001.44002.00002.9003.56255.19557.2145
2size2.5696720.9511000.3701251.002.00002.00002.0003.00004.00006.0000
\n", 600 | "
" 601 | ], 602 | "text/plain": [ 603 | " variable mean std_dev variation_coef p_0.01 p_0.05 p_0.25 \\\n", 604 | "0 total_bill 19.785943 8.902412 0.449936 7.25 9.5575 13.3475 \n", 605 | "1 tip 2.998279 1.383638 0.461478 1.00 1.4400 2.0000 \n", 606 | "2 size 2.569672 0.951100 0.370125 1.00 2.0000 2.0000 \n", 607 | "\n", 608 | " p_0.5 p_0.75 p_0.95 p_0.99 \n", 609 | "0 17.795 24.1275 38.0610 48.2270 \n", 610 | "1 2.900 3.5625 5.1955 7.2145 \n", 611 | "2 2.000 3.0000 4.0000 6.0000 " 612 | ] 613 | }, 614 | "execution_count": 12, 615 | "metadata": {}, 616 | "output_type": "execute_result" 617 | } 618 | ], 619 | "source": [ 620 | "profiling_num(tips)" 621 | ] 622 | }, 623 | { 624 | "cell_type": "markdown", 625 | "metadata": {}, 626 | "source": [ 627 | "Also numpy as before:" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": 13, 633 | "metadata": {}, 634 | "outputs": [ 635 | { 636 | "data": { 637 | "text/html": [ 638 | "
\n", 639 | "\n", 652 | "\n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | "
variablemeanstd_devvariation_coefp_0.01p_0.05p_0.25p_0.5p_0.75p_0.95p_0.99
0019.7859438.9024120.4499367.259.557513.347517.79524.127538.06148.227
112.9982791.3836380.46147811.4422.93.56255.19557.2145
262.5696720.9511000.3701251222346
\n", 714 | "
" 715 | ], 716 | "text/plain": [ 717 | " variable mean std_dev variation_coef p_0.01 p_0.05 p_0.25 \\\n", 718 | "0 0 19.785943 8.902412 0.449936 7.25 9.5575 13.3475 \n", 719 | "1 1 2.998279 1.383638 0.461478 1 1.44 2 \n", 720 | "2 6 2.569672 0.951100 0.370125 1 2 2 \n", 721 | "\n", 722 | " p_0.5 p_0.75 p_0.95 p_0.99 \n", 723 | "0 17.795 24.1275 38.061 48.227 \n", 724 | "1 2.9 3.5625 5.1955 7.2145 \n", 725 | "2 2 3 4 6 " 726 | ] 727 | }, 728 | "execution_count": 13, 729 | "metadata": {}, 730 | "output_type": "execute_result" 731 | } 732 | ], 733 | "source": [ 734 | "profiling_num(tips_np)" 735 | ] 736 | }, 737 | { 738 | "cell_type": "markdown", 739 | "metadata": {}, 740 | "source": [ 741 | "Pandas series & 1D array:" 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": 14, 747 | "metadata": {}, 748 | "outputs": [ 749 | { 750 | "data": { 751 | "text/html": [ 752 | "
\n", 753 | "\n", 766 | "\n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | "
variablemeanstd_devvariation_coefp_0.01p_0.05p_0.25p_0.5p_0.75p_0.95p_0.99
0total_bill19.7859438.9024120.4499367.259.557513.347517.79524.127538.06148.227
\n", 800 | "
" 801 | ], 802 | "text/plain": [ 803 | " variable mean std_dev variation_coef p_0.01 p_0.05 p_0.25 \\\n", 804 | "0 total_bill 19.785943 8.902412 0.449936 7.25 9.5575 13.3475 \n", 805 | "\n", 806 | " p_0.5 p_0.75 p_0.95 p_0.99 \n", 807 | "0 17.795 24.1275 38.061 48.227 " 808 | ] 809 | }, 810 | "execution_count": 14, 811 | "metadata": {}, 812 | "output_type": "execute_result" 813 | } 814 | ], 815 | "source": [ 816 | "profiling_num(tips['total_bill'])" 817 | ] 818 | }, 819 | { 820 | "cell_type": "code", 821 | "execution_count": 15, 822 | "metadata": {}, 823 | "outputs": [ 824 | { 825 | "data": { 826 | "text/html": [ 827 | "
\n", 828 | "\n", 841 | "\n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | "
variablemeanstd_devvariation_coefp_0.01p_0.05p_0.25p_0.5p_0.75p_0.95p_0.99
0019.7859438.9024120.4499367.259.557513.347517.79524.127538.06148.227
\n", 875 | "
" 876 | ], 877 | "text/plain": [ 878 | " variable mean std_dev variation_coef p_0.01 p_0.05 p_0.25 \\\n", 879 | "0 0 19.785943 8.902412 0.449936 7.25 9.5575 13.3475 \n", 880 | "\n", 881 | " p_0.5 p_0.75 p_0.95 p_0.99 \n", 882 | "0 17.795 24.1275 38.061 48.227 " 883 | ] 884 | }, 885 | "execution_count": 15, 886 | "metadata": {}, 887 | "output_type": "execute_result" 888 | } 889 | ], 890 | "source": [ 891 | "profiling_num(tips_np[:,0])" 892 | ] 893 | }, 894 | { 895 | "cell_type": "code", 896 | "execution_count": null, 897 | "metadata": {}, 898 | "outputs": [], 899 | "source": [ 900 | "\n", 901 | "\n", 902 | "\n", 903 | "\n", 904 | "\n", 905 | "\n", 906 | "\n", 907 | "\n", 908 | "\n" 909 | ] 910 | }, 911 | { 912 | "cell_type": "markdown", 913 | "metadata": {}, 914 | "source": [ 915 | "### 1.3) Univariate analysis in categorical variables" 916 | ] 917 | }, 918 | { 919 | "cell_type": "code", 920 | "execution_count": 16, 921 | "metadata": {}, 922 | "outputs": [], 923 | "source": [ 924 | "from funpymodeling.exploratory import freq_tbl" 925 | ] 926 | }, 927 | { 928 | "cell_type": "markdown", 929 | "metadata": {}, 930 | "source": [ 931 | "It retrieves several statistics related to categorical variablees, such as frequecnyt, percentage an cumulative percentage.\n", 932 | "\n", 933 | "It will run for all categorical variables excluding all the other ones." 934 | ] 935 | }, 936 | { 937 | "cell_type": "markdown", 938 | "metadata": {}, 939 | "source": [ 940 | "Just like the others, it supports: pandas dataframe, pandas series and 1D/2D numpy arrays" 941 | ] 942 | }, 943 | { 944 | "cell_type": "code", 945 | "execution_count": 17, 946 | "metadata": {}, 947 | "outputs": [ 948 | { 949 | "name": "stdout", 950 | "output_type": "stream", 951 | "text": [ 952 | " sex frequency percentage cumulative_perc\n", 953 | "0 Male 157 0.643443 0.643443\n", 954 | "1 Female 87 0.356557 1.000000\n", 955 | "\n", 956 | "----------------------------------------------------------------\n", 957 | "\n", 958 | " smoker frequency percentage cumulative_perc\n", 959 | "0 No 151 0.618852 0.618852\n", 960 | "1 Yes 93 0.381148 1.000000\n", 961 | "\n", 962 | "----------------------------------------------------------------\n", 963 | "\n", 964 | " day frequency percentage cumulative_perc\n", 965 | "0 Sat 87 0.356557 0.356557\n", 966 | "1 Sun 76 0.311475 0.668033\n", 967 | "2 Thur 62 0.254098 0.922131\n", 968 | "3 Fri 19 0.077869 1.000000\n", 969 | "\n", 970 | "----------------------------------------------------------------\n", 971 | "\n", 972 | " time frequency percentage cumulative_perc\n", 973 | "0 Dinner 176 0.721311 0.721311\n", 974 | "1 Lunch 68 0.278689 1.000000\n", 975 | "\n", 976 | "----------------------------------------------------------------\n", 977 | "\n" 978 | ] 979 | } 980 | ], 981 | "source": [ 982 | "freq_tbl(tips)" 983 | ] 984 | }, 985 | { 986 | "cell_type": "markdown", 987 | "metadata": {}, 988 | "source": [ 989 | "If 1 variable is provided, it returns the table associated to that variable so we can use in our data pipeline:" 990 | ] 991 | }, 992 | { 993 | "cell_type": "code", 994 | "execution_count": 18, 995 | "metadata": {}, 996 | "outputs": [ 997 | { 998 | "data": { 999 | "text/html": [ 1000 | "
\n", 1001 | "\n", 1014 | "\n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | "
dayfrequencypercentagecumulative_perc
0Sat870.3565570.356557
1Sun760.3114750.668033
2Thur620.2540980.922131
3Fri190.0778691.000000
\n", 1055 | "
" 1056 | ], 1057 | "text/plain": [ 1058 | " day frequency percentage cumulative_perc\n", 1059 | "0 Sat 87 0.356557 0.356557\n", 1060 | "1 Sun 76 0.311475 0.668033\n", 1061 | "2 Thur 62 0.254098 0.922131\n", 1062 | "3 Fri 19 0.077869 1.000000" 1063 | ] 1064 | }, 1065 | "execution_count": 18, 1066 | "metadata": {}, 1067 | "output_type": "execute_result" 1068 | } 1069 | ], 1070 | "source": [ 1071 | "day_freq=freq_tbl(tips['day'])\n", 1072 | "\n", 1073 | "day_freq" 1074 | ] 1075 | }, 1076 | { 1077 | "cell_type": "markdown", 1078 | "metadata": {}, 1079 | "source": [ 1080 | "Days with low representativity (30%):" 1081 | ] 1082 | }, 1083 | { 1084 | "cell_type": "code", 1085 | "execution_count": 19, 1086 | "metadata": {}, 1087 | "outputs": [ 1088 | { 1089 | "data": { 1090 | "text/html": [ 1091 | "
\n", 1092 | "\n", 1105 | "\n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | "
dayfrequencypercentagecumulative_perc
0Sat870.3565570.356557
1Sun760.3114750.668033
\n", 1132 | "
" 1133 | ], 1134 | "text/plain": [ 1135 | " day frequency percentage cumulative_perc\n", 1136 | "0 Sat 87 0.356557 0.356557\n", 1137 | "1 Sun 76 0.311475 0.668033" 1138 | ] 1139 | }, 1140 | "execution_count": 19, 1141 | "metadata": {}, 1142 | "output_type": "execute_result" 1143 | } 1144 | ], 1145 | "source": [ 1146 | "day_freq[day_freq['percentage']>0.3]" 1147 | ] 1148 | }, 1149 | { 1150 | "cell_type": "code", 1151 | "execution_count": 20, 1152 | "metadata": {}, 1153 | "outputs": [ 1154 | { 1155 | "data": { 1156 | "text/plain": [ 1157 | "0 Sat\n", 1158 | "1 Sun\n", 1159 | "Name: day, dtype: category\n", 1160 | "Categories (4, object): ['Thur', 'Fri', 'Sat', 'Sun']" 1161 | ] 1162 | }, 1163 | "execution_count": 20, 1164 | "metadata": {}, 1165 | "output_type": "execute_result" 1166 | } 1167 | ], 1168 | "source": [ 1169 | "day_freq[day_freq['percentage']>0.3]['day']" 1170 | ] 1171 | }, 1172 | { 1173 | "cell_type": "code", 1174 | "execution_count": null, 1175 | "metadata": {}, 1176 | "outputs": [], 1177 | "source": [ 1178 | "\n", 1179 | "\n", 1180 | "\n", 1181 | "\n", 1182 | "\n", 1183 | "\n", 1184 | "\n", 1185 | "\n", 1186 | "\n" 1187 | ] 1188 | }, 1189 | { 1190 | "cell_type": "markdown", 1191 | "metadata": {}, 1192 | "source": [ 1193 | "### 1.4) Pairwaise correlation analysis" 1194 | ] 1195 | }, 1196 | { 1197 | "cell_type": "code", 1198 | "execution_count": 21, 1199 | "metadata": {}, 1200 | "outputs": [], 1201 | "source": [ 1202 | "from funpymodeling.exploratory import corr_pair" 1203 | ] 1204 | }, 1205 | { 1206 | "cell_type": "markdown", 1207 | "metadata": {}, 1208 | "source": [ 1209 | "A wrapper around `corr` of pandas that allow us to quickly filter most important variables, or not. \n", 1210 | "\n", 1211 | "Useful in EDA and when doing the features pre-selection before creating the predictive model." 1212 | ] 1213 | }, 1214 | { 1215 | "cell_type": "code", 1216 | "execution_count": 22, 1217 | "metadata": {}, 1218 | "outputs": [ 1219 | { 1220 | "data": { 1221 | "text/html": [ 1222 | "
\n", 1223 | "\n", 1236 | "\n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | "
variableq_nanp_nanq_zerosp_zerosuniquetype
0carat00.000.000000273float64
1cut00.000.0000005object
2color00.000.0000007object
3clarity00.000.0000008object
4depth00.000.000000184float64
5table00.000.000000127float64
6price00.000.00000011602int64
7x00.080.000148554float64
8y00.070.000130552float64
9z00.0200.000371375float64
\n", 1352 | "
" 1353 | ], 1354 | "text/plain": [ 1355 | " variable q_nan p_nan q_zeros p_zeros unique type\n", 1356 | "0 carat 0 0.0 0 0.000000 273 float64\n", 1357 | "1 cut 0 0.0 0 0.000000 5 object\n", 1358 | "2 color 0 0.0 0 0.000000 7 object\n", 1359 | "3 clarity 0 0.0 0 0.000000 8 object\n", 1360 | "4 depth 0 0.0 0 0.000000 184 float64\n", 1361 | "5 table 0 0.0 0 0.000000 127 float64\n", 1362 | "6 price 0 0.0 0 0.000000 11602 int64\n", 1363 | "7 x 0 0.0 8 0.000148 554 float64\n", 1364 | "8 y 0 0.0 7 0.000130 552 float64\n", 1365 | "9 z 0 0.0 20 0.000371 375 float64" 1366 | ] 1367 | }, 1368 | "execution_count": 22, 1369 | "metadata": {}, 1370 | "output_type": "execute_result" 1371 | } 1372 | ], 1373 | "source": [ 1374 | "diamonds = sns.load_dataset('diamonds')\n", 1375 | "\n", 1376 | "status(diamonds)" 1377 | ] 1378 | }, 1379 | { 1380 | "cell_type": "code", 1381 | "execution_count": 23, 1382 | "metadata": {}, 1383 | "outputs": [ 1384 | { 1385 | "data": { 1386 | "text/html": [ 1387 | "
\n", 1388 | "\n", 1401 | "\n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | "
v1v2RR2
1depthcarat0.0282240.000797
2tablecarat0.1816180.032985
3pricecarat0.9215910.849331
4xcarat0.9750940.950809
5ycarat0.9517220.905775
6zcarat0.9533870.908947
7caratdepth0.0282240.000797
9tabledepth-0.2957790.087485
10pricedepth-0.0106470.000113
11xdepth-0.0252890.000640
\n", 1484 | "
" 1485 | ], 1486 | "text/plain": [ 1487 | " v1 v2 R R2\n", 1488 | "1 depth carat 0.028224 0.000797\n", 1489 | "2 table carat 0.181618 0.032985\n", 1490 | "3 price carat 0.921591 0.849331\n", 1491 | "4 x carat 0.975094 0.950809\n", 1492 | "5 y carat 0.951722 0.905775\n", 1493 | "6 z carat 0.953387 0.908947\n", 1494 | "7 carat depth 0.028224 0.000797\n", 1495 | "9 table depth -0.295779 0.087485\n", 1496 | "10 price depth -0.010647 0.000113\n", 1497 | "11 x depth -0.025289 0.000640" 1498 | ] 1499 | }, 1500 | "execution_count": 23, 1501 | "metadata": {}, 1502 | "output_type": "execute_result" 1503 | } 1504 | ], 1505 | "source": [ 1506 | "res=corr_pair(diamonds)\n", 1507 | "\n", 1508 | "res.head(10)" 1509 | ] 1510 | }, 1511 | { 1512 | "cell_type": "markdown", 1513 | "metadata": {}, 1514 | "source": [ 1515 | "If `price` is the target..." 1516 | ] 1517 | }, 1518 | { 1519 | "cell_type": "markdown", 1520 | "metadata": {}, 1521 | "source": [ 1522 | "Feature analysis for predictive modeling:" 1523 | ] 1524 | }, 1525 | { 1526 | "cell_type": "code", 1527 | "execution_count": 24, 1528 | "metadata": {}, 1529 | "outputs": [], 1530 | "source": [ 1531 | "res_target_ordered=res[res['v2']=='price'].sort_values('R2', ascending=False)" 1532 | ] 1533 | }, 1534 | { 1535 | "cell_type": "code", 1536 | "execution_count": null, 1537 | "metadata": {}, 1538 | "outputs": [], 1539 | "source": [ 1540 | "\n", 1541 | "\n", 1542 | "\n", 1543 | "\n" 1544 | ] 1545 | }, 1546 | { 1547 | "cell_type": "markdown", 1548 | "metadata": {}, 1549 | "source": [ 1550 | "Get top 3 most correlated features:" 1551 | ] 1552 | }, 1553 | { 1554 | "cell_type": "code", 1555 | "execution_count": 25, 1556 | "metadata": {}, 1557 | "outputs": [ 1558 | { 1559 | "data": { 1560 | "text/plain": [ 1561 | "21 carat\n", 1562 | "25 x\n", 1563 | "26 y\n", 1564 | "Name: v1, dtype: object" 1565 | ] 1566 | }, 1567 | "execution_count": 25, 1568 | "metadata": {}, 1569 | "output_type": "execute_result" 1570 | } 1571 | ], 1572 | "source": [ 1573 | "# Top 3:\n", 1574 | "top_vars=res_target_ordered[0:3]['v1']\n", 1575 | "top_vars" 1576 | ] 1577 | }, 1578 | { 1579 | "cell_type": "code", 1580 | "execution_count": null, 1581 | "metadata": {}, 1582 | "outputs": [], 1583 | "source": [ 1584 | "\n", 1585 | "\n", 1586 | "\n", 1587 | "\n" 1588 | ] 1589 | }, 1590 | { 1591 | "cell_type": "markdown", 1592 | "metadata": {}, 1593 | "source": [ 1594 | "On the opposite: Delete less relevant features (threshold R2 < 0.05)" 1595 | ] 1596 | }, 1597 | { 1598 | "cell_type": "code", 1599 | "execution_count": 26, 1600 | "metadata": {}, 1601 | "outputs": [ 1602 | { 1603 | "data": { 1604 | "text/plain": [ 1605 | "23 table\n", 1606 | "22 depth\n", 1607 | "Name: v1, dtype: object" 1608 | ] 1609 | }, 1610 | "execution_count": 26, 1611 | "metadata": {}, 1612 | "output_type": "execute_result" 1613 | } 1614 | ], 1615 | "source": [ 1616 | "res_target_ordered[res_target_ordered['R2']<0.05]['v1']" 1617 | ] 1618 | }, 1619 | { 1620 | "cell_type": "code", 1621 | "execution_count": null, 1622 | "metadata": {}, 1623 | "outputs": [], 1624 | "source": [ 1625 | "\n", 1626 | "\n", 1627 | "\n", 1628 | "\n", 1629 | "\n", 1630 | "\n", 1631 | "\n", 1632 | "\n", 1633 | "\n" 1634 | ] 1635 | }, 1636 | { 1637 | "cell_type": "markdown", 1638 | "metadata": {}, 1639 | "source": [ 1640 | "### 1.5) Get numeric and categorical var names" 1641 | ] 1642 | }, 1643 | { 1644 | "cell_type": "markdown", 1645 | "metadata": {}, 1646 | "source": [ 1647 | "Definitely, this is not fancy but useful internally and used with sklearn pipelines." 1648 | ] 1649 | }, 1650 | { 1651 | "cell_type": "code", 1652 | "execution_count": 27, 1653 | "metadata": {}, 1654 | "outputs": [], 1655 | "source": [ 1656 | "from funpymodeling.exploratory import cat_vars, num_vars" 1657 | ] 1658 | }, 1659 | { 1660 | "cell_type": "code", 1661 | "execution_count": 28, 1662 | "metadata": {}, 1663 | "outputs": [ 1664 | { 1665 | "data": { 1666 | "text/html": [ 1667 | "
\n", 1668 | "\n", 1681 | "\n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | " \n", 1695 | " \n", 1696 | " \n", 1697 | " \n", 1698 | " \n", 1699 | " \n", 1700 | " \n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | "
variableq_nanp_nanq_zerosp_zerosuniquetype
0total_bill00.000.0229float64
1tip00.000.0123float64
2sex00.000.02category
3smoker00.000.02category
4day00.000.04category
5time00.000.02category
6size00.000.06int64
\n", 1767 | "
" 1768 | ], 1769 | "text/plain": [ 1770 | " variable q_nan p_nan q_zeros p_zeros unique type\n", 1771 | "0 total_bill 0 0.0 0 0.0 229 float64\n", 1772 | "1 tip 0 0.0 0 0.0 123 float64\n", 1773 | "2 sex 0 0.0 0 0.0 2 category\n", 1774 | "3 smoker 0 0.0 0 0.0 2 category\n", 1775 | "4 day 0 0.0 0 0.0 4 category\n", 1776 | "5 time 0 0.0 0 0.0 2 category\n", 1777 | "6 size 0 0.0 0 0.0 6 int64" 1778 | ] 1779 | }, 1780 | "execution_count": 28, 1781 | "metadata": {}, 1782 | "output_type": "execute_result" 1783 | } 1784 | ], 1785 | "source": [ 1786 | "status(tips)" 1787 | ] 1788 | }, 1789 | { 1790 | "cell_type": "markdown", 1791 | "metadata": {}, 1792 | "source": [ 1793 | "Retrieve categorical var names:" 1794 | ] 1795 | }, 1796 | { 1797 | "cell_type": "code", 1798 | "execution_count": 29, 1799 | "metadata": {}, 1800 | "outputs": [ 1801 | { 1802 | "data": { 1803 | "text/plain": [ 1804 | "Index(['sex', 'smoker', 'day', 'time'], dtype='object')" 1805 | ] 1806 | }, 1807 | "execution_count": 29, 1808 | "metadata": {}, 1809 | "output_type": "execute_result" 1810 | } 1811 | ], 1812 | "source": [ 1813 | "cat_vars(tips)" 1814 | ] 1815 | }, 1816 | { 1817 | "cell_type": "markdown", 1818 | "metadata": {}, 1819 | "source": [ 1820 | "Retrieve numerical var names:" 1821 | ] 1822 | }, 1823 | { 1824 | "cell_type": "code", 1825 | "execution_count": 30, 1826 | "metadata": {}, 1827 | "outputs": [ 1828 | { 1829 | "data": { 1830 | "text/plain": [ 1831 | "Index(['total_bill', 'tip', 'size'], dtype='object')" 1832 | ] 1833 | }, 1834 | "execution_count": 30, 1835 | "metadata": {}, 1836 | "output_type": "execute_result" 1837 | } 1838 | ], 1839 | "source": [ 1840 | "num_vars(tips)" 1841 | ] 1842 | }, 1843 | { 1844 | "cell_type": "code", 1845 | "execution_count": null, 1846 | "metadata": {}, 1847 | "outputs": [], 1848 | "source": [ 1849 | "\n", 1850 | "\n", 1851 | "\n", 1852 | "\n", 1853 | "\n", 1854 | "\n", 1855 | "\n", 1856 | "\n", 1857 | "\n", 1858 | "\n" 1859 | ] 1860 | }, 1861 | { 1862 | "cell_type": "markdown", 1863 | "metadata": {}, 1864 | "source": [ 1865 | "## 2) Data Preparation" 1866 | ] 1867 | }, 1868 | { 1869 | "cell_type": "markdown", 1870 | "metadata": {}, 1871 | "source": [ 1872 | "### 2.1) Convert \"almost-everything\" into a pandas dataframe" 1873 | ] 1874 | }, 1875 | { 1876 | "cell_type": "code", 1877 | "execution_count": 31, 1878 | "metadata": {}, 1879 | "outputs": [], 1880 | "source": [ 1881 | "from funpymodeling.data_prep import todf\n", 1882 | "\n", 1883 | "import numpy as np" 1884 | ] 1885 | }, 1886 | { 1887 | "cell_type": "markdown", 1888 | "metadata": {}, 1889 | "source": [ 1890 | "Note: Yes, under certain scenarios is not convenient due to performance reasons. But many scenarios we need/want to test or do a quick explore.\n", 1891 | "\n", 1892 | "`todf` is used as the entry point in many functions of `funPyModeling`." 1893 | ] 1894 | }, 1895 | { 1896 | "cell_type": "code", 1897 | "execution_count": 32, 1898 | "metadata": {}, 1899 | "outputs": [ 1900 | { 1901 | "data": { 1902 | "text/html": [ 1903 | "
\n", 1904 | "\n", 1917 | "\n", 1918 | " \n", 1919 | " \n", 1920 | " \n", 1921 | " \n", 1922 | " \n", 1923 | " \n", 1924 | " \n", 1925 | " \n", 1926 | " \n", 1927 | " \n", 1928 | " \n", 1929 | " \n", 1930 | " \n", 1931 | " \n", 1932 | " \n", 1933 | " \n", 1934 | " \n", 1935 | " \n", 1936 | " \n", 1937 | " \n", 1938 | " \n", 1939 | " \n", 1940 | " \n", 1941 | " \n", 1942 | "
0
011
112
25
32
\n", 1943 | "
" 1944 | ], 1945 | "text/plain": [ 1946 | " 0\n", 1947 | "0 11\n", 1948 | "1 12\n", 1949 | "2 5\n", 1950 | "3 2" 1951 | ] 1952 | }, 1953 | "execution_count": 32, 1954 | "metadata": {}, 1955 | "output_type": "execute_result" 1956 | } 1957 | ], 1958 | "source": [ 1959 | "# 1D List\n", 1960 | "list_1d = [11, 12, 5, 2] \n", 1961 | "todf(list_1d)" 1962 | ] 1963 | }, 1964 | { 1965 | "cell_type": "code", 1966 | "execution_count": 33, 1967 | "metadata": {}, 1968 | "outputs": [ 1969 | { 1970 | "data": { 1971 | "text/html": [ 1972 | "
\n", 1973 | "\n", 1986 | "\n", 1987 | " \n", 1988 | " \n", 1989 | " \n", 1990 | " \n", 1991 | " \n", 1992 | " \n", 1993 | " \n", 1994 | " \n", 1995 | " \n", 1996 | " \n", 1997 | " \n", 1998 | " \n", 1999 | " \n", 2000 | " \n", 2001 | " \n", 2002 | " \n", 2003 | " \n", 2004 | " \n", 2005 | " \n", 2006 | " \n", 2007 | " \n", 2008 | " \n", 2009 | " \n", 2010 | " \n", 2011 | " \n", 2012 | " \n", 2013 | " \n", 2014 | " \n", 2015 | " \n", 2016 | " \n", 2017 | " \n", 2018 | " \n", 2019 | " \n", 2020 | " \n", 2021 | " \n", 2022 | " \n", 2023 | " \n", 2024 | " \n", 2025 | " \n", 2026 | "
0123
0111252
11524610
2108125
3121586
\n", 2027 | "
" 2028 | ], 2029 | "text/plain": [ 2030 | " 0 1 2 3\n", 2031 | "0 11 12 5 2\n", 2032 | "1 15 24 6 10\n", 2033 | "2 10 8 12 5\n", 2034 | "3 12 15 8 6" 2035 | ] 2036 | }, 2037 | "execution_count": 33, 2038 | "metadata": {}, 2039 | "output_type": "execute_result" 2040 | } 2041 | ], 2042 | "source": [ 2043 | "# 2D List\n", 2044 | "list_2d = [[11, 12, 5, 2], [15,24, 6,10], [10, 8, 12, 5], [12,15,8,6]]\n", 2045 | "todf(list_2d)" 2046 | ] 2047 | }, 2048 | { 2049 | "cell_type": "code", 2050 | "execution_count": 34, 2051 | "metadata": {}, 2052 | "outputs": [ 2053 | { 2054 | "data": { 2055 | "text/html": [ 2056 | "
\n", 2057 | "\n", 2070 | "\n", 2071 | " \n", 2072 | " \n", 2073 | " \n", 2074 | " \n", 2075 | " \n", 2076 | " \n", 2077 | " \n", 2078 | " \n", 2079 | " \n", 2080 | " \n", 2081 | " \n", 2082 | " \n", 2083 | " \n", 2084 | " \n", 2085 | " \n", 2086 | " \n", 2087 | " \n", 2088 | " \n", 2089 | " \n", 2090 | " \n", 2091 | " \n", 2092 | " \n", 2093 | " \n", 2094 | " \n", 2095 | "
0
011
112
25
32
\n", 2096 | "
" 2097 | ], 2098 | "text/plain": [ 2099 | " 0\n", 2100 | "0 11\n", 2101 | "1 12\n", 2102 | "2 5\n", 2103 | "3 2" 2104 | ] 2105 | }, 2106 | "execution_count": 34, 2107 | "metadata": {}, 2108 | "output_type": "execute_result" 2109 | } 2110 | ], 2111 | "source": [ 2112 | "# 1D numpy array\n", 2113 | "array_1d = np.array(list_1d)\n", 2114 | "todf(array_1d)" 2115 | ] 2116 | }, 2117 | { 2118 | "cell_type": "code", 2119 | "execution_count": 35, 2120 | "metadata": {}, 2121 | "outputs": [ 2122 | { 2123 | "data": { 2124 | "text/html": [ 2125 | "
\n", 2126 | "\n", 2139 | "\n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | " \n", 2157 | " \n", 2158 | " \n", 2159 | " \n", 2160 | " \n", 2161 | " \n", 2162 | " \n", 2163 | " \n", 2164 | " \n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | "
0123
0111252
11524610
2108125
3121586
\n", 2180 | "
" 2181 | ], 2182 | "text/plain": [ 2183 | " 0 1 2 3\n", 2184 | "0 11 12 5 2\n", 2185 | "1 15 24 6 10\n", 2186 | "2 10 8 12 5\n", 2187 | "3 12 15 8 6" 2188 | ] 2189 | }, 2190 | "execution_count": 35, 2191 | "metadata": {}, 2192 | "output_type": "execute_result" 2193 | } 2194 | ], 2195 | "source": [ 2196 | "# 2D numpy array\n", 2197 | "array_2d = np.array(list_2d)\n", 2198 | "todf(array_2d)" 2199 | ] 2200 | }, 2201 | { 2202 | "cell_type": "code", 2203 | "execution_count": 36, 2204 | "metadata": {}, 2205 | "outputs": [ 2206 | { 2207 | "data": { 2208 | "text/html": [ 2209 | "
\n", 2210 | "\n", 2223 | "\n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | " \n", 2230 | " \n", 2231 | " \n", 2232 | " \n", 2233 | " \n", 2234 | " \n", 2235 | " \n", 2236 | " \n", 2237 | " \n", 2238 | " \n", 2239 | " \n", 2240 | " \n", 2241 | " \n", 2242 | " \n", 2243 | " \n", 2244 | " \n", 2245 | " \n", 2246 | " \n", 2247 | " \n", 2248 | " \n", 2249 | " \n", 2250 | " \n", 2251 | " \n", 2252 | " \n", 2253 | "
v1v2
01115
11224
256
3210
\n", 2254 | "
" 2255 | ], 2256 | "text/plain": [ 2257 | " v1 v2\n", 2258 | "0 11 15\n", 2259 | "1 12 24\n", 2260 | "2 5 6\n", 2261 | "3 2 10" 2262 | ] 2263 | }, 2264 | "execution_count": 36, 2265 | "metadata": {}, 2266 | "output_type": "execute_result" 2267 | } 2268 | ], 2269 | "source": [ 2270 | "# Data frame (in=out)\n", 2271 | "pd_df=pd.DataFrame({'v1':[11, 12, 5, 2], 'v2':[15,24, 6,10]}) #\n", 2272 | "todf(pd_df)" 2273 | ] 2274 | }, 2275 | { 2276 | "cell_type": "code", 2277 | "execution_count": 37, 2278 | "metadata": {}, 2279 | "outputs": [ 2280 | { 2281 | "data": { 2282 | "text/html": [ 2283 | "
\n", 2284 | "\n", 2297 | "\n", 2298 | " \n", 2299 | " \n", 2300 | " \n", 2301 | " \n", 2302 | " \n", 2303 | " \n", 2304 | " \n", 2305 | " \n", 2306 | " \n", 2307 | " \n", 2308 | " \n", 2309 | " \n", 2310 | " \n", 2311 | " \n", 2312 | " \n", 2313 | " \n", 2314 | " \n", 2315 | " \n", 2316 | " \n", 2317 | " \n", 2318 | " \n", 2319 | " \n", 2320 | " \n", 2321 | " \n", 2322 | "
v1
011
112
25
32
\n", 2323 | "
" 2324 | ], 2325 | "text/plain": [ 2326 | " v1\n", 2327 | "0 11\n", 2328 | "1 12\n", 2329 | "2 5\n", 2330 | "3 2" 2331 | ] 2332 | }, 2333 | "execution_count": 37, 2334 | "metadata": {}, 2335 | "output_type": "execute_result" 2336 | } 2337 | ], 2338 | "source": [ 2339 | "# Pandas series\n", 2340 | "todf(pd_df['v1'])" 2341 | ] 2342 | }, 2343 | { 2344 | "cell_type": "markdown", 2345 | "metadata": {}, 2346 | "source": [ 2347 | "Raise an error in +2D objects:" 2348 | ] 2349 | }, 2350 | { 2351 | "cell_type": "code", 2352 | "execution_count": 41, 2353 | "metadata": {}, 2354 | "outputs": [ 2355 | { 2356 | "name": "stdout", 2357 | "output_type": "stream", 2358 | "text": [ 2359 | "(1, 4, 4)\n" 2360 | ] 2361 | } 2362 | ], 2363 | "source": [ 2364 | "list_3d = np.array([[[11, 12, 5, 2], [15,24, 6,10], [10, 8, 12, 5], [12,15,8,6]]]) # error\n", 2365 | "print(list_3d.shape)\n", 2366 | "# todf(list_3d) # <- error" 2367 | ] 2368 | }, 2369 | { 2370 | "cell_type": "markdown", 2371 | "metadata": {}, 2372 | "source": [ 2373 | "It raises the error: `Exception: I live in flattland! (can't handle objects with more than 2 dimensions)`" 2374 | ] 2375 | }, 2376 | { 2377 | "cell_type": "code", 2378 | "execution_count": null, 2379 | "metadata": {}, 2380 | "outputs": [], 2381 | "source": [ 2382 | "\n", 2383 | "\n", 2384 | "\n", 2385 | "\n", 2386 | "\n", 2387 | "\n", 2388 | "\n", 2389 | "\n", 2390 | "\n", 2391 | "\n", 2392 | "\n" 2393 | ] 2394 | }, 2395 | { 2396 | "cell_type": "markdown", 2397 | "metadata": {}, 2398 | "source": [ 2399 | "## 3) Model validation: Clustering" 2400 | ] 2401 | }, 2402 | { 2403 | "cell_type": "code", 2404 | "execution_count": 43, 2405 | "metadata": {}, 2406 | "outputs": [], 2407 | "source": [ 2408 | "from funpymodeling.model_validation import coord_plot" 2409 | ] 2410 | }, 2411 | { 2412 | "cell_type": "code", 2413 | "execution_count": 44, 2414 | "metadata": {}, 2415 | "outputs": [], 2416 | "source": [ 2417 | "from sklearn.cluster import KMeans\n", 2418 | "\n", 2419 | "x = iris.drop('species', axis=1)\n", 2420 | "\n", 2421 | "mod_km=KMeans(n_clusters=3)\n", 2422 | "iris['cluster']=mod_km.fit_predict(x)\n" 2423 | ] 2424 | }, 2425 | { 2426 | "cell_type": "code", 2427 | "execution_count": 45, 2428 | "metadata": {}, 2429 | "outputs": [ 2430 | { 2431 | "data": { 2432 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAEuCAYAAABriGJyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAABWiUlEQVR4nO3dd1gU19vG8e+hVwV7AXvFhgjYa+z600RNj0nsijW29F5M7CV2TVHfRKPRFHuvUUGwi1hRsYuIdBb2vH8ARo3KArs7u3A+1+Wluzs7504Gnp09M/OMkFKiKIqi5E82WgdQFEVRTEcVeUVRlHxMFXlFUZR8TBV5RVGUfEwVeUVRlHxMFXlFUZR8zE6rgYUQskGDBloNr+RRaGgoavtZJ7XtrFtoaOgdKWVxQ5cXWp0nL4SQ6hx96yWEQG0/66S2nXUTQoRKKf0NXV5N1yiKouRjqsgriqLkY6rIK4qi5GOaHXhVFEXRik6nIyoqiuTkZK2jPJWTkxNeXl7Y29vnaT3ZFnkhxA9AV+CWlLL2E14XwAygM5AIvC2lDMtTKkVRFBOKiorC3d2dChUqkFHCLIuUkujoaKKioqhYsWKe1mXIdM1PQMdnvN4JqJr5ZyAwN0+JFEVRTCw5OZmiRYtaZIGHjDOgihYtapRvGtnuyUspdwshKjxjke7AkszzIQ8IITyEEKWllNfznE6xSMlpOq0jKLmUderk7du3NU6irfT0dNLS0rSOka309PQ8bytjzMmXBa489Dgq87lsi/y1a9coU6aMESIo5nIh9g5d136vdQwlF3Q6HYsXLwbgo48+0jiNtnr37s21a9c0zbBz506++OIL0tPTefnllwkKCvrPMvfu3WPWrFl5GsesB16FEAPJmNIBYMKECbRq1YqiRYuaM4aSB9NuhnE/NeMr5J/bNlHY1lHjRIohdDodu3fv5tatWwA0atRI40TacnR0xNXVVbPx09PT+eyzz1i5ciVlypShffv2dOvWjerVqz+ynKOj43+21YIFC3I0ljGK/FXA+6HHXpnP/YeUcgGwADKueC1UqBC7du1iyJAh1KxZ0whRFFPad/08oRc38kpVf6YA/zgl8F3TDlrHUrIRFxfHzJkzuXPnDn379mXBggX06dNH61iaCg8Pp1ixYpqNv3//fqpVq4a/f8aFq6+//jq7d++madOmjyx3+/bt/2yrvn375mgsYxT5v4BhQojlQEMg1tD5+PHjxzNjxgxmzZpFv379VD8NC5au1/NF8FrKunrwZaPuTAF+PRvC2zWbULNIKa3jKU9x9+5dpk+fzt27dwkKCqJOnTpaR7I4nx78m5N3jTt1U6tIGT5v+L+nvn716lW8vf/dN/by8uLgwYNGzZAl27NrhBC/AvuB6kKIKCFEPyHEYCHE4MxF1gMXgHPAQuC/E0tPUbhwYcaMGUOFChVYuHAhe/bsycV/gmIOq86HcfLudT7w74SzXcZ5u+72TnwZsk71QbFQN27cYOLEidy/f5+RI0eqAl9AGXJ2zavZvC6BobkN4OrqyqhRo5g/fz7Lli0jISGBDh06WOypTQVRgi6F70I34Ve8HN0q1n3w/Du+z/FZ8Fq2R0XwnHcNDRMqj4uMjGTmzJnY2toyZsyYR/YalUc9a4/bVMqWLcuVK/+erxIVFUXZsmVNMpZFtDVwcHAgKCiIwMBA1qxZw++//672Di3InOO7uJUUx6eBXR/58H2rZmMqFSrGlyHr0OnTNUyoPOz06dNMnToVJycnxo0bpwq8BQoICODs2bNcvHiR1NRUli9fTrdu3UwylkUUeQBbW1v69OlDq1at2LJlC0uWLCE9XRUOrV2Lv8f8E3voXrEeDUqUe+Q1extbPgrozLnY2yw7bZr5RCVnwsLCmDVrFkWLFmX8+PGUKFFC60jKE9jZ2fH999/ToUMHatasyUsvvUStWrVMM5ZJ1ppLNjY2vPLKK7i5ubF27VoSEhIYMGBAnns3KLn3bdgmJJL3/Z980XM775o0LV2ZqUe28kJlXzwcXcycUMmyd+9eli1bRsWKFRk2bJimpwgq2evcuTOdO3c2+TgWsyefRQjB//73P15++WWOHj3KzJkzSUpK0jpWgXT49hVWnz/MgFrN8HLzfOIyQgg+CejCvZQkZh7dbuaESpZNmzaxdOlSfHx8GDVqlCrwygMWV+SztGnThr59+3Lu3DmmTp1KXFyc1pEKFCklXwSvpZiTG8Pqtn7msrWKluHlqg34MXw/F+/fMVNCBTK20++//87q1asJCAggKCgIR0d1gZryL4st8gANGzYkKCiI69evM2nSJO7evat1pAJjXeRxQm5dYpxfe9zssy8a4/zaY29jy9chG8yQToGMqyaXLl3K5s2badmyJX379sXOzqJmYBULYNFFHqBOnTqMGjWK+/fvM3HiRK5fV33PTC05Tcc3hzZSw7MUr1Q17FaSJV0KMaxuKzZePsn+GxdMnFDR6XQsXLiQffv20aVLF1599VVsbCz+11nRgFX8VFSpUoWxY8eSnp7OpEmTuHjxotaR8rUfwv/hcvxdPg3sgm0OCseAWs0p41qYL4LXoZd6EyYs2JKTk5k1axaHDx/mpZdeolu3buq6EuWprKLIQ8Zlv+PHj8fZ2Zlp06YRHh6udaR86U5SPLOObqetdw2al6mao/c629nzfoNOHI++yu/nDpsoYcEWHx/P1KlTOXv2LH369OG5557TOpJi4aymyAMUL16c8ePHU6xYMb7//nvCwtQNqIxtyuEtJKXp+DigS67e371SXXyLefNt2CYSdalGTlew3b17l0mTJnHt2jWGDBlS4DtJWru+fftSokQJatf+zw33jMqqijz82++mfPnyLFiwgL1792odKd+IiLnJ/50JpneNRlQuXDxX67ARNnwW2JWbifeZe2KXkRMWXFl9aO7du8eIESOoW7du9m9SLNrbb7/Nxo0bTT6O1RV5yOh3M3LkSHx8fFi6dKlZ/kcVBF+ErMPd3pHRvnmbAvAvWZ5uFesy9/huriXEGildwRUZGcmkSZNIS0tj7NixVKtWTetIihG0aNGCIkWKmHwcqz3fytHRkaCgIH766SfWrFlDQkICPXr0UAegcmlHVAS7rp7h08AueDrl/UKa9xt0ZNPlU0wM3cT0Fi8ZIWHBFBERwezZs3Fzc2PkyJGULFlS60j5zorQz4m6d8qo6/Ty8OHlBp8adZ25ZZV78lns7Ozo27cvrVq1YvPmzSxdulT1u8mFNH06X4aso4J7Ud6q0dgo6/R2L0J/n2asOh/G0TtRRllnQXPkyBFmzpz5oA+NKvBKbljtnnyWJ/W76d+/v+p3kwP/FxHMmXu3WNSmNw62xvuRGFa3FSvOHuKL4LWs6jRIfcvKgX379rF06VIqVKjA8OHDVZsCE7KUPW5Tseo9+SwP97s5cuQIs2bNUv1uDBSbksSUw1tpXKoSHcr5GHXd7g5OjPVrx8Gbkay/dMKo687PNm/ezJIlS6hZsybvvPOOKvBKnuSLIp8lq9/N2bNnVb8bA808toOYlEQ+Dexikj3tV6r6U92jJN8c2kBKeprR15+fSClZvXo1v//+Ow0aNGDo0KGqD00+9uqrr9K4cWMiIiLw8vJi8eLFJhknXxV5UP1uciLyfjQ/ntrHS1X9qF3UNHelsbOx5dPArlyKu8uPp/4xyRj5gV6vZ9myZWzatIkWLVrQv39/1Ycmn/v111+5fv06Op2OqKgo+vXrZ5Jx8l2Rh//2u7lx44bWkSzSN4c2YGdjyzi/DiYdp0XZqrTxqs6Mo9uITo436VjWKKsPzd69e+ncuTOvvfaa6kOjGE2+/UmqUqUKY8aMIT09nYkTJxIZGal1JIty8MZF1l86QVCdlpRyKWTy8T4O6EJimo6ph7eafCxrkpyc/ODq7RdffJHu3burA9SKUeXbIg/g7e3NuHHjcHJyYurUqarfTSa91PN58FpKuxRmUO3mZhmzqkcJ3qjekGURwZy5d9MsY1q6+Ph4pk2bxpkzZ3j77bdp27at1pGUfChfF3mAEiVKMH78eIoWLar63WRaff4Ix6Kv8p5/R5ztHMw27pj6bXG1d+DL4PVmG9NSxcTEMHnyZKKiohg8eDCNGxvn+gRFeVy+L/IAHh4ejB07lnLlyhX4fjeJulS+Dd1IvWJevFCpnlnHLuLkysh6bdhxNYKdV8+YdWxLktWHJiYmhpEjR1Kvnnm3g1KwFIgiDxn9bkaNGvWg382mTZu0jqSJ+Sd3cyPxPp8GdsVGmH/zv12zCeXdi/Jl8DrS9AXv6uTLly8zefJkdDodY8aMUX1oFJMrMEUe/u13ExAQ8OB8ZCml1rHM5npCLHOO76JLhToElqygSQZHWzs+9O9ExL2b/HomRJMMWomIiGDKlCk4ODgwbtw4ypUrp3UkRSNXrlyhdevW+Pj4UKtWLWbMmGGysQrcibhZ/W5cXFzYvHkzCQkJvP7669ja2modzeQmhW0mXa/nA/+OmuboVL4WDUtWZPLhLXSv5EshBydN85jDkSNHWLhwIcWLF2fkyJF4enpqHUnRkJ2dHVOmTMHPz4+4uDgaNGhAu3bt8PEx7lXnUMD25LPY2Njw6quv0qVLF/bt28fChQvR6XRaxzKp43eusvJcGP18mlHevaimWYQQfBrYhbvJicw6ukPTLObwzz//MG/ePLy9vRk7dqwq8AqlS5fGz88PAHd3d2rWrMnVq1dNMlaB25PPIoSgW7duuLm5sWLFCmbNmkVQUBBOTvlvr1JKyechayni5MLweq21jgNA3WJe9KpSn8Wn9tK7RkPKuZu+r7YWtmzZwqpVq6hZsyaDBw/Olz9f1m7FihVERRm3U6qXlxcvv/yyQctGRkZy+PBhGjZsaNQMWQrknvzD2rRpQ58+fR70u4mPz39XZG68fJIDNy4ytn47i5oaGe/XAVsbG745tEHrKEYnpWTNmjWsWrXqQR8aVeCVx8XHx9OzZ0+mT59OoUKmuSixwO7JP6xRo0a4uLiwYMECJk6cyKhRo8xyxxZzSElP46uQDVT3KMmr1QK0jvOI0q6FGVK7JVOPbCXkZiQBGh0MNja9Xs8vv/zCnj17aNGiBa+++qpqU2DBDN3jNjadTkfPnj15/fXX6dGjh8nGUT95merWrcvIkSOJjY3NV/1ufg7fz6W4aD4O7IKdjeUdXB5cuwWlXArxWfBa9FKvdZw8y+pDs2fPHjp16qT60ChPJKWkX79+1KxZk9GjR5t0LIN++oQQHYUQEUKIc0KI957wejkhxA4hxGEhxDEhRGfjRzW9qlWrMnbsWNLT05k0aZLV97u5m5zA9KPbaFW2Gq3KWub52C72DrzXoANH70Txx4WjWsfJk+TkZGbPnk1YWBi9evXi+eefV31olCfKuinM9u3b8fX1xdfXl/XrTXMluMjuPHEhhC1wBmgHRAEhwKtSylMPLbMAOCylnCuE8AHWSykrZLNeaannqN+6dYvp06cTHx/P0KFDqV69utaRcuWjA3+y9PRBtjw/kmoexr11nBDCaNcY6KWern/P5nZSPLt7jjFrqwVjiY+PZ9asWVy+fJk333zTotsUGHPbWavw8HBq1qypdYxsPSmnECJUSulv6DoM2ZMPBM5JKS9IKVOB5UD3x5aRQNZRg8LANUMDWKKH+93MnDmTw4cPax0px87eu8XS0wd5vXqg0Qu8sdkIGz4N7Mr1xFgWnNijdZwcU31oFEtmyIHXssCVhx5HAY+f6/MZsFkIMRxwBZ7YTk8IMRAYmPV4586dOYhqfo0aNWLXrl3Mnz+fgIAAKleurHUkg02+EYoDNjRKdDHZ/2djrzfApSQzj2yn3B0dnnbWcSbK/fv32bFjB6mpqbRs2ZKYmBiL/7kGy//dM7XChQtbxZ3jkpOT87ytjHV2zavAT1LKKUKIxsBSIURtKR89kialXAAsgIzpmlatWhlpeNNp1aoV8+bNIzg4GG9vb9q3b691pGztvnqWIxc38qF/J7rVaWmycYy9/Srcr0ObNVPZ6xTPlGbaXpVriMuXLzNz5kxsbW159913rapNgTX87plSeHg47u7uWsfIlpOTE/Xr18/TOgyZrrkKeD/02CvzuYf1A34DkFLuB5yAYnlKZiEcHR0ZOnQo/v7+/P7776xevdqi5zPT9Rm94su5FaGvT1Ot4+RIhUJF6ePTlN/OhnIi2jRX/xnLmTNnmDJlCvb29qoPjZWy5N9jMF4+Q4p8CFBVCFFRCOEAvAL89dgyl4HnAIQQNcko8reNktAC2NnZ0a9fP1q0aMGmTZtYtmwZer1lnu7369kQIu7d5IOATjjaWt9lECPqtsbT0YXPg9dZ7C/h0aNHmTFjBp6enowfP56SJS37mIfyX05OTkRHR1vsz5iUkujoaKNcQJdtFZBSpgkhhgGbAFvgBynlSSHEF8AhKeVfwBhgoRDiHTIOwr5tsafO5JKNjQ2vvfYabm5urF+/noSEBPr164e9vb3W0R6IS01mctgWAktWoEv52lrHyZXCjs6Mqd+WDw/8yebLp+hQvpbWkR6xf/9+lixZQrly5Rg+fDhubm5aR1JywcvLi6ioKG7fttx9UScnJ7y8vPK8nmxPoTQVSz6FMjtbt25l5cqV1KhRgyFDhljM5eoTDm1k9vGdrO06FN/i3tkunxemPA0vTZ9Ouz9moNOns/2Fd3CwkG8k+aUPjTqF0rqZ4hRK5TFt27alT58+nDlzhmnTpllEv5srcXdZdGovPSvXN3mBNzU7G1s+DuxCZFw0S04f0DoOUkr++OMPVq1ahZ+fn+pDo1gVVeRzqVGjRgwePJirV68yadIk7t69q2meCaEbEQjebWD5Z6UYonXZarQsU5VpR7YSk5ygWY6sPjQbNmygefPmDBgwwKKm6BQlO6rI50G9evUYMWIE9+7d07TfTeitS/x18RiD67SgjGthTTIYmxCCjwO7EKdLYdqRbZpkSEtLY9GiRezevZuOHTvy+uuvqz40itVRP7F5VK1aNcaMGUNaWhqTJk3i0qVLZh1fL/V8FryWks7uBNU23TnxWqjhWYrXqgWy5PQBzsea9wBZVh+a0NBQevXqxQsvvKD60ChWSRV5IyhXrhzjxo3D0dGRqVOnEhERYbax/7xwjMO3r/Bugw642Ftfz5fsjK3fDic7e74KMU3zpieJj49n+vTphIeH8+abb9KuXTuzja0oxqaKvJGULFmScePGUaRIEWbOnMmRI0dMPmZSmo4JoRuoU7Qsvar4mXw8LRRzdmN43dZsuRLO3mvnTD5eVh+aK1euMHjwYJo2ta4LyhTlcarIG5Gnpydjx47F29ubefPmsW/fPpOOt/DkHq4lxPJJYBdsRP7dlP18muLt5snnwWtJN+FFaDdv3mTSpEnExMQwYsQIfH19TTaWophL/q0MGnF1dWXUqFHUqFGDJUuWsGXLFpOMcysxju+P7aRjuVo0LlXJJGNYCic7ez7070R4zA1WnDtkkjEuX77MpEmTSElJYfTo0VbbXlpRHqeKvAk4OTkxdOhQGjRowKpVq1izZo3RLz6ZGLYJnT6dDwM6GXW9lqpLhToElCjPpLDNxOtSjLrux/vQlC9f3qjrVxQtqSJvIvb29vTv358WLVqwceNGo/a7OXX3GivOhtKnZmMqFsoXfeCyJYTgk8Cu3E6KZ/axnUZb79GjR5k5cyYeHh6MGzeOUqVKGW3dimIJLON68Xwqq9+Nq6srGzZsIDExkb59++bpYhopJZ8Hr8PD0ZkR9doYMa3lq1/cmxcq+bLg5B5erx6Il5tnntan+tAoBYHakzcxIQTPP/88L774ImFhYcyePZvk5ORcr2/rlXD2XT/PaN+2eDi6GDGpdXg/84reCaEb87SerVu38tNPP1GtWjXeeecdVeCVfEsVeTNp27Ytb7/9NhEREbnud5OansaXIeupUrg4b9R4/OZcBUMZNw8G127BnxeOEnrrco7fL6Xkzz//ZOXKlfj5+TFs2DDVh0bJ11SRN6PGjRszePBgoqKimDx5MjExMTl6/9KIg1y4f4ePA7pgb2NropSWL6hOS0o4u/NF8NocHdDO6kOzfv16mjVrpvrQKAWCKvJmVq9ePUaOHElMTAwTJ07k5s2bBr0vJiWRqYe30rxMFdp4FezT+1ztHRnv157Q25f5++Ixg96TlpbG4sWLH/SheeONN1QfGqVAUD/lGsjqd6PT6Zg0aRKXL2c/7TDjyDbidMl8EtBV9VABXqzSgFpFSvNN6AaS03TPXDYlJYXZs2dz6NAhevbsqfrQKAWKummIhm7evMn06dNJTEwkKCjoqRfgXIi9TZs103i5qj/fNe1h5pRPZgk3nth3/Twvb1zIew06MKxu6ycuk5CQwKxZs4iMjKR3794Fvk3BoctrCSj/PyZs6o6zfSGcHdxxsS+Es707zg4Zf7s4FMp47cG/M15ztHPN11dWW4uc3jREnUKpoZIlSzJ+/HhmzJjBzJkzGTBgwBMvpf/60Aac7OwZ66caZT2saenKdCjnw6yjO3i5qj/Fnd0feT0mJoaZM2dy69YtBg0alOe73lu787cP8eP+dwBwsncjURdLdEIUSbr7JOnuo0t/9kVmAvHIh8HDHxJOD38g2Bd65MPB2T7zg8TBHXtbdZDb3NSevAWIj4/n+++/59KlS7z55ps0btz4wWv/7q12ZFjdVtqFfIwl7MnDv99yXqrqz8SHvuXcvHmTGTNmEB8fz9ChQwt8m4K7CVf5ZlM3nO3d+Krb7iduO116Ckm6OJJS75OkiyMx9X7mB8BDz+nuk5Qa9+D5R5bRxSHlsy/4s7NxzCz+T/sGkfXh8ZTn7NywKcAnHUDO9+RVkbcQycnJzJs3j/DwcHr16kW7du1I1+vp/PcsYlOT2PnCGJzsLOdMEEsp8gCfHfybH8L/YWO3EfgUKc2VK1eYMWMGUkqGDx9OhQoVtI6oqZS0RCZt6cnthCu8124NZTyqmWTbSSlJSUvILPyZHwSPfTgk6u6T/NCHQ2JqxuOsD4zU9KRsx3Gyc8PZwf2JU0rO9oUefGt4/NtG1jcMe1snqz4mo4q8FdPpdPz444+EhobSsWNHkn3KMu6f1cxp+SrdKtXTOt4jLKnIx6Qk0mzVJOoWK8vnFVswe/ZsnJ2dGTVqVIFvU6CXehbuG8rhKxsY2vIH6pRpY1Hb7nHpel3mN4esD4b7GX9nPvefbxCp/36LyPpw0cu0Z45hI+ye8OGQNaX036moR45TZL5ua6PdTLcq8lZOr9fz66+/snv3bm6VcUPvV4k/ugZZ3J6HpRWKxaf2MXvj7zQ6n0zxYsUZOXIkRYoU0TqW5v4+Pp21J6bR0/cD2tccBFjetjMmKSWp6UmZHxKxj0w1PfxBkJQa+8Tpp6TUOJLTsr9Q0cHW+QkfEoVxdnDPOD7xxIPa/x6rcLRzzfXvtDrwauWy+t0cj7sJhyOo5KkjLS1NXbSTjRqxAv8zCcS7O/L1mNF4Fsof97rNi7Ar61l7YhqNKvSkXY2BWscxCyEEjnYuONq54EHJXK1Dr08nKS3+3w+C1PuPfVjcf2xKKo74lBhux19+sEyaPjWbnDaZ3xge/gZR6BnHKwo/eJxTqshboGsJsax0uUv7+pW5cDic2bNnM3jwYHX5/VNs27aN3377jRIVvFhaPJ4/ok7Sx6eJ1rE0dSXmJD/uH03FovV5I/Abi/smaMlsbGxxdSiMq0PudxR06cn/+XB4+FvDkw5gRydcefCeZF0cEuN821LTNRZo+K7lrL90gt09xnLp2CmWLl1K+fLlGTZsmMU00rKEr/xSSv7++2/WrVtH/fr16du3L29uX8KJu9fY23NsgWzgBnA/6TbfbO4GSN5v/xeFnUs88rolbDvl2fRST4ou/pFvElnHKBpX6pmj6Rp1ZYOFOXz7CmsuHGFgreaUdfOgSZMmDBo0iCtXruSq301+lXXsYt26dTRt2pSBAwfi4ODAJ4GdiU1JYubR7VpH1IQuPYV5ewcRn3KXoOYL/1PgFetgI2xwdihEEdeyeHnWpGqJQOqVbUujijm/GFIVeQuS0St+LcWd3Rj60Dnxvr6+jBgxgpiYGCZNmmRwv5v8Ki0tjR9++IFdu3bRvn17evfu/aAPjU+RMrxSzZ8fw/dzIfaOxknNS0rJLyEfcv5OKG83mkK5InW0jqRYAFXkLcjayOMcunWJcX7tcbN3fOS16tWrM2bMGFJTUw3ud5MfpaSkMGfOHEJCQujRowc9e/b8z3zzuPrtcbCx5ZtDGzRKqY1tEYv55+JKutQagX+5rlrHUSyEKvIWIjlNxzeHNlDTsxQvV3nydFu5cuUYN24c9vb2TJkyhTNnzpg5pbYSEhKYPn06p06donfv3nTo0OGJy5VwcWdY3dZsvHySf66fN3NKbZy8votVR77G16sDXeu8o3UcxYKoIm8hFp/ax5X4GD4N7IrtM1rgZvW78fT0ZMaMGRw9etSMKbVz7949Jk+ezOXLlxk0aBDNmjV75vL9azWjrKsHXwSvQ5/NpfbW7sb98yzcN4yyhavTp9E01URMeYRBPw1CiI5CiAghxDkhxHtPWeYlIcQpIcRJIcQvxo2Zv91JimfWsR20865JszJVsl3e09OTsWPH4uXlxbx589i/f78ZUmrn1q1bTJo0iejoaIYPH25QozFnO3ve9+/IibvXWHUuzAwptZGQGsvs3f2ws7EnqMUinOxdtY6kWJhsi7wQwhaYDXQCfIBXhRA+jy1TFXgfaCqlrAWMMn7U/Gvy4S0kp+n4KKCzwe9xc3PjnXfeoXr16vz0009s3brVhAm1c+XKFSZNmkRSUhKjR4+mRo0aBr+3e8V61C/uzXehm0jUPfviFGuUrk9j4b6hRCdEMbj5fIq6emkdSbFAhuzJBwLnpJQXpJSpwHKg+2PLDABmSyljAKSUt4wbM/86HXODX84E82aNRlQuXDxH73VycmLo0KH4+fmxcuVK/vjjj3x1/vPZs2eZMmUKtra2jB8/PseNxoQQfBbYlZtJccw5scs0ITW06vDXhN/Yw2v+X1GleIDWcRQLZUiRLwtceehxVOZzD6sGVBNC7BNCHBBCdDRWwPxMSsmXwetwt3finfptc7UOe3t7BgwYQLNmzdiwYQO//PILer31z0EfP36cGTNmUKhQIcaPH5/rRmMNSpSnW8W6zDu+m2sJsUZOqZ2955ez/cwPtKnWl2aVX9E6jmLBjNXWwA6oCrQCvIDdQog6Usp7Dy8khBgIPGiisXPnTiMNb52OJt5m182zvFGkBkf3B+dpXV5eXtSsWZPdu3cTGRlJo0aNsLU1bd9tU22/yMhIDhw4gKenJ02aNOHYMcPu4/o0rXUebNCnM3r9zwwuXtdIKbVzO/UM26O/pZRDLYrfb5ar7VDQf/cKkmzbGgghGgOfSSk7ZD5+H0BKOeGhZeYBB6WUP2Y+3ga8J6UMecZ6C3RbA50+nfZ/zCBN6tn2/CgcbI3zebtlyxZWrVqFj48PgwYNMlm/G1NdGr99+3ZWrFhB9erVCQoKMlr+CYc2Mvv4TtZ2HYpvcW+jrFML0QlRTNjUDReHwrzb/o9c9VdRbQ2sW067UBoyXRMCVBVCVBRCOACvAH89tswfZOzFI4QoRsb0zQVDQxRE/xcRzNnYW3zk38loBR6gXbt2vPnmm4SHhzN9+nQSEhKMtm5TklLy119/sWLFCnx9fRk+fLhRP6CG1W1FMSc3vghZZ7UFLlmXwJzd/UnT6whqsShPDbSUgiPbIi+lTAOGAZuAcOA3KeVJIcQXQohumYttAqKFEKeAHcA4KWW0qUJbu9iUJKYc3kKTUpVoX84n+zfkUNOmTRk8eLDV9LvR6/UsX778kT40xm6t7O7gxFi/dgTfjGTdpRNGXbc56KWeHw+8w9XYCAY0/Z5ShSprHUmxEqoLpQa+DF7HgpN72dhtOLWKljHZOBEREcyZMwdXV1dGjhxJyZK566/9JMb6yp+WlsZPP/1ESEgI7du3p0ePHiZri5uu19Phr5kk6lLZ0WM0jkb8BmVqfx2bwrqTM3mx/se0rdE/T+tS0zXWzRTTNYoRRd6P5ofwf3ipagOTFnjI6HczevRoUlJSLLLfTWpqKnPnzn1mHxpjsrWx4ZOALlyOv8sPp/aZbBxjO3R5LetOzqRJxRd5rno/reMoVkYVeTP7+tB67G1sGe/X3izjlS9f3iL73WT1oTl58uQz+9AYW4uyVXnOqwYzj24nOjn727xp7fLd4/x0YAyVizXgtYCv1c0/lBxTRd6MDty4wIZLJxlapyUlXXJ+G6/cKlWqFOPGjcPDw4OZM2dq3u8mNjaWKVOmcOnSJQYOHJhtHxpj+yigM4lpOqYctuyrhGOTbjFnzwDcHIswuNl87G0ds3+TojxGFXkz0Us9XwSvo7RLYQbWbm728YsUKcK4ceMoW7aspv1ubt++zcSJE7lz5w7Dhg3Dz8/P7BmqepSgd42GLIs4SESMZfbm16UnM3fPQBJS7jG0xSIKOefsamhFyaKKvJmsPn+YY9FXed+/I852DppkyOp3U61aNU363URFRTFx4kSSkpJ45513qFmzplnHf9gY37a42zvyZcg6zTI8jZSSZcEfcDH6MH0aT8Xbs5bWkRQrpoq8GSTqUpkQuol6xbx4vlI9TbM4OTk92INeuXIlf/75p1nOtDh37hyTJ0/G1taWcePGUbFiRZOP+SyeTq6MrPccO6+eYUdUhKZZHrfl9AIORP5O19rv4OdteNM6RXkSVeTNYN6J3dxMvM9ngV0totf3w/1u1q9fz6+//mrSfjfHjx9n+vTpD/rQlC5d2mRj5cTbNRtTwb0oX4asI02frnUcAI5f287qIxPw8+5Ml9ojtI6j5APaV5x87npCLHNP7KJrhToElKygdZwHbGxseOONN+jQoQO7du1i8eLFpKWlGX2c4OBg5syZQ+nSpRk3bhxFihQx+hi55WBrx4cBnThz7xa/nnlqBw6zuR57lkX/jMDLoyZvN5piETsEivWznqtBrNTEsE2k6/V84N9J6yj/IYSgR48euLq6snr1ahITExk8eDCOjsY5i2PHjh0sX76catWqERQUhLOzs1HWa0wdy9WiUamKTD68he6VfCnkYJpeP9lJSLnH7N39sLdxJKjFIhztXDTJoeQ/alfBhI7diWLluTD612pGOXfL2YN9XIcOHR70u5k2bVqe+91IKfn7779Zvnw5vr6+jBgxwiILPGR80H0a0JW7yYnMOrpDkwzpeh0L9gURk3idIc3nU8T18U7eipJ7qsibiJSSz4PXUdTJleF1W2sdJ1tNmzZl0KBBD/rd3Lt3L1fryepDs3btWpo0aWKSPjTGVqdYWV6s4sfiU3u5FGf+lku/hX3J6Zv7eD3gGyoXN/hqdUUxiCryJrLh0kkO3rzIuPrtcddoCiCn6tevz4gRI4iOjmbixIncupWzG3ylp6fz448/snPnzgfdME3d095YxjfogK2NDRMObTTruLvP/cLOsz/Ttnp/mlR60axjKwWDKvImkJKexteH1lPdoySvVLOuPbPH+91cuXIl+zeR0Ydmzpw5BAcH88ILL5i8D42xlXIpRFCdlqyNPE7wzUizjHnm1gF+PfQxtUq3pKfvB2YZUyl4VJE3gZ/C/+FS3F0+CeyCnY117Mk+rEKFCowbNw5bW1smT57M2bNnn7l8YmLigz40b7zxBh07drSqAp9lUK0WlHIpxOfBa9FL095C8U78ZebtGUxxt/L0bzILGyv8OVGsgyryRhadHM+Mo9tpXbY6LctW0zpOrpUqVYrx48fj4eHBjBkznnoLvof70AwYMIDmzc3fssFYXOwdeK9BR47eiWLNBdP190nWxTN7d3+kTGdoy8W4qJt/KCakiryRTT28jQRdKh8HWv+Viln9bsqUKcPcuXM5cODAI69n9aG5ffs2w4YNo0GDBholNZ4elX2pV8yLbw9tJCkt1ejr10s9P+x/hxv3zzGg6RxKumt75a+S/6kib0Rn7t1kWcRB3qgeSDUP492gQ0tubm6MHj2aqlWr8uOPP7J9+/YHr1lKHxpjshEZPeevJ8Yy/8Qeo6//r2OTOXp1My/W/xif0tb7rUexHpoW+fUnZ+WrO9R8FbIeV3sHRtdvq3UUo3JycmL48OH4+vqyYsUKli5dCmRcNTt27FjN+9AYW8NSFelcvjazj+/kRuJ9o603OPJPNpyaTbPKr9C62ttGW6+iPIumRf7PY5P5+eA40tKN/7XY3HZdPcP2qAhG1G1DUSc3reMYnb29PQMHDqRp06bs3bsXgPHjx1OmjGnvbqWVD/w7ka7XMylsk1HWFxl9lCXB46hSPJBXG3xplQemFeukaZHvWnsU+y+uZOaut0hMjdUySp6k6dP5Ingd5d2L0MenidZxTMbW1pbevXszZMgQAIoWLapxItOpUKgofXya8tvZMI7fuZqndd1LvMncPQMo5FScwc3mYWerTatppWDStMj/r8479Gk0jXO3Q5i4pQd34i3rHqSGWn7mEBH3bvKBfyerujl0bggh8PX11TqGWYys1wZPRxe+CFmX62nF1LRk5u4dSJIujqDmi3B3yr8fjIpl0vzAa6OKPRjVehmxybf5dvPzXLxzWOtIORKXmsykw5tpWLICncvX1jqOYkSFHJwY69eO/TcusOnyqRy/X0rJ0uB3iYw+Qp9G0/DyzB8HpxXronmRB6hWohHvtluDo70rU7a/TNiV9VpHMtisYzuITk7gk8Cuap41H3qtWgDVPErwVch6UtNz1op5U/g8gi/9Qbc6Y6jv3dFECRXl2SyiyAOUKlSZ99r9QTnP2szfO4RN4fMs/syby3F3WXRyL70q+1GvmJfWcRQTsLOx5eOALkTGRfPzacPvi3v06lb+OPod/uX+R+daw02YUFGezWKKPIC7U1HeafML/uX+x+ojE/i/kA9I1+u0jvVUEw5txEbYML5BB62jKCbU2ivj6uXpR7YRk5x9G+ZrsWdY/M8IvIvU5q2Gk9Q3PEVTFlXkAextnejXZCadfIay5/wvfL+rL0mpxjtX2VgO3bzE35HHGFKnBWVc1WXp+d0nAV2I06Uw9ci2Zy4XnxLD7N39cLRzIaj5QhzsLLOPvlJwWFyRh4yrDp+vN543Aydy+uY/TNzai7sJeTuNzZj0Us9nwWsp6VKIIbVbah1HMYPqniV5vVogS04f4Ny9J7dgTtfrmL93MPcSbzKk+QI8XSzjXrZKwWaRRT5L08ovM6LVz8QkXmfC5u5cuntc60gA/HnhGEfuXOE9vw642KtznguKMfXb4WJnz1eHnnxiwIrQzzhz6wC9A7+lUjE/M6dTlCez6CIPULNUM8a3+x17W0cmb32RI1GbNc2TlJbKhNAN1Clalp5V6muaRTGvYs5uDK/Xhq1XTrPn2qPtl3edXcquc8toX2MQjSr20CihovyXxRd5gDKFq/Feuz8o41GdeXsGsi3iB83OvFlwYg/XEmL5NLALNsIq/vcpRtS3ZhPKuRXhi+B1pOszes5H3PyH5aGfUqdMG16o967GCRXlUVZTpQo5F2dMm+X4enXgt7DPWR76Ken6nJ23nFc3E+8z+/guOpWvRaNSlcw6tmIZnOzs+cC/I+ExN1hx7hC34y4xf+8QSrpXpF+TmermH4rFMajICyE6CiEihBDnhBDvPWO5nkIIKYQwyT3vHOycGdhsLu1qDGTn2Z+Zu2cAybrsT2kzlolhm9Hp0/nAv5PZxlQsT5cKdQgoUZ4poWuZtbsvEklQi8U427trHU1R/iPbIi+EsAVmA50AH+BVIYTPE5ZzB0YCB40d8mE2woZe9T/kNf+vOXl9F5O39iIm8YYphwTgZPQ1fjsbSt+aTahYqJjJx1MslxCCjwM64Z2+hZv3LzCo6VxKuFfQOpaiPJEhe/KBwDkp5QUpZSqwHOj+hOW+BL4Dko2Y76laVn2DoS1+4Hb8Zb7d3J0rMTnvLWIoKSWfB6/Fw9GZEfXamGwcxXpEXltOSdtrnE73x83tP/s8imIxDGmZWBa48tDjKKDhwwsIIfwAbynlOiHEuKetSAgxEBiY9Xjnzp05CvskrTzeZffdaXy76QWaegRRxqlentf5uNCEW/xz6wJvFa3J4f0m/aJiVYyx/azRxcR9HIhdgJdTc7bEejNq/c8MK+GrdawcKajbriAS2Z2lIoToBXSUUvbPfNwbaCilHJb52AbYDrwtpYwUQuwExkopD2WzXmmsM2TuJd7k+919iLoXzisNPqdV1TeNsl6A1PQ0nvtjOrZCsOX5UdirA2tAxpSFpfcWMoWLdw4zedvLVCpWn1GtlzH1yA5mHN3On12G0KBEea3jGaSgbrv8QggRKqU0+LinIdM1VwHvhx57ZT6XxR2oDewUQkQCjYC/THXw9Uk8XEoy9rmV1CnThl8PfczKsC/R69ONsu4lpw9w8f4dPg7oogp8AReTeIO5ewfi4VyCQc3mYmtjT1CdlpR0dufz4Nz3nFcUUzKkyIcAVYUQFYUQDsArwF9ZL0opY6WUxaSUFaSUFYADQLfs9uSNzcnelSHNFtCmWh+2Rixi3t7BpKQl5mmdMSmJTDuyjRZlqtLGq7qRkirWKDUt+cHZXEEtFuPmWAQAV3tHxjfoQNjty/x18ZjGKRXlv7It8lLKNGAYsAkIB36TUp4UQnwhhOhm6oA5YWNjy8sNPuNlv884dm0rU7a9TGzSk/uMGGL6kW3E6ZL5OKCL6iRYgEkpWXJwHJfvHqdf4xmU9Xj0A79XZT9qFSnNN4c2kJRmuV1TlYLJoPPkpZTrpZTVpJSVpZRfZz73iZTyrycs28rce/GPa1O9D0HNF3Hj/jm+3fw8V+9F5Hgd52Nv83P4fl6tGkDNIqVMkFKxFhtOzSbk8l90rzuOel7t/vO6rY0NnwZ25WrCPRaf2qtBQkV5Oqu54jWn6pZ9jrHP/Ua61DFxa09OXd+do/d/HbIeJzt7xvr995daKTiORG3mz2OTCCzfnY4+QU9drknpynQo58Osozu4lRhnxoSK8mz5tsgDlCtSh/fa/UkxVy9m7XqbPed+Neh9+66dY/OVcIbXbU1xZ3UVY0F19d5pftg/kgpF6tE7cGK2U3Yf+ncmVZ/O5MNbzJRQUbKXr4s8QBHXMoxru4qapZqxLOQ9Vh/5Fr3UP3X5dL2ez0PW4e3mST+fpmZMqliSuORoZu/uh5O9G4ObL8DBzinb91QqXIy3azZm+dkQTt29boaUipK9fF/kAZzs3Rja4gdaVHmDTeFzWbRvGKlpT74w97dzoZy6e50P/DvhZGdv5qSKJUhLT2X+3iHEJt1mSPOFeLoYfkxmZL02FHJw5gt1SqViIQpEkQewtbHjNf+v6FX/I8KurGfq9le4n3znkWXidSlMDNuEf4nydK1QR6OkipaklPwa+glnbx/krYYTqVjUN0fv93B0YbTvc+y9fo5tUadNE1JRcqDAFHnIuNKvXY0BDGo2j6h74Xy3+Xmux/5784c5x3ZyOymeTwLVKZMF1Y6zP7H3/K90rBlEYIXnc7WO3jUaUblwcb4MWY/OSBflKUpuFagin6W+d0fGPvcbqenJTNzSg4ib/3A1/h7zT+7h+Uq++BUvp3VERQPhN/ayMuxL6pZtS/d6T23BlC17G1s+DujM+djbLD19wIgJFSXnsu1dY7KBjdi7JrfuxF/h+119uBl3EVG4B5tuO7K7x1jKunlomssa5Lf+JzfjLvLtpm54uJTi3XZrcLJ3y9P6pJS8umkxJ+5eY2/PsXg4uhgpad7lt21X0Jiid02+VczNm/Htfqe0py/p937jxZJ3KeNaWOtYipklpd5nzu5+CGFLUIvFeS7wkFFIPwnsQmxKEjOObjdCSkXJnQJd5AGc7QsRmtqK21Qn5u46ftg/El26WVriKxZAr09n4T/DuBV3iUHN5lLczXhTdT5FSvNKNX9+Ct/Phdg72b9BUUygwBf5vy8eI/TOVXr5fcUL9d4l+NKfTN/xBvEpd7WOppjB6qMTOHl9F680+JzqJRsbff3j6rfHwcaWrw+tN/q6FcUQBbrIJ6fp+CZ0Az5FSvNyVX86+gQxoOlsIqOP8d3mF7gZd1HriIoJ7b+wii2nF9Kyam9aVn3DJGOUcHFnWN3WbLp8in+unzfJGIryLAW6yC86tY+o+Ht8GtAFW5uM/xX+5boy+rlfSdTd57vNz3P2VrDGKRVTOH/7EMtC3qd6ySa87PepScfqX6sZZV09+CJ4Hen6p19trSimUGCL/O2kOL4/toP23jVpWqbKI69VLtaA99r9gZtjUabveJ3gyD+0CamYxN2Ea8zbOxhPl9IMappx8w9Tcraz533/jpy4e41V58NMOpaiPK7AFvnJYVtITtPxUUDnJ75e3L0877ZbTaVifizeP5J1J2ao087ygZS0RObs6U9qWhJBLRbh6uhhlnG7V6yHX/FyTAzdRIIuxSxjKgoU0CIffvcGv54N4a2ajalUuPhTl3N19GBkq6U0qtCTv45P5eeDY0lLTzVjUsWYpJT8fGAsUTGn6N90FmUKVzPb2EIIPg3sws2kOOYc32W2cRWlwBV5KSVfhqzD3d6JUb7PZbu8na0Dbzeawv/qjGb/xVXM2NmbhNRYMyRVjG3dyZmEXlnHC77vUadMG7OP36BEebpXrMf8E3u4Fn/P7OMrBVOBK/LboyLYfe0s7/g+h6eBVyEKIehaeyR9Gk3jwp0wvtv8ArfjL5s4qWJMYVfW8/fxqTSq0IP2NQZpluN9/45IJN+GbdIsg1KwFKgir9On82XIOioVKsZbNXN+TnSjij0Y2XoZ8SnRfLf5ec7fCTVBSsXYrsSc5Mf9o6lYtD5vBE7QtPmcl5snA2o1Y/X5wxy5fUWzHErBUaCK/LLTBzkXe5uPAjpjb2Obq3VUK9GQd9utwdnenanbXiX08jojp1SM6X7yHebsHoCLQ2GGNJ+PvW32N/8wtWF1W1PMyY3Pg9eqg/mKyRWYIn8vJZGpR7bStHRl2nnXzNO6ShaqxLvt11C+SB0W7Ati46m56pfVAunSU5i3ZxBxKdEEtVhEYeeSWkcCwM3ekXF+7Qm5dYl1l05oHUfJ5wpMkZ95dDv3UpL4JMA4veLdHIvwTpv/I6B8N9Yc/ZZlIe+RrtcZIaliDFJKfj30EefvHOKthpMpX8SybgLzSlV/aniW4puQDSSnqZ8bxXQKRJG/eP8OP4bv5+WqDahVtIzR1mtv60TfxjPoXGs4e88vZ9auPiSl3jfa+pXc2xbxA/su/EbnWsMJKP8/reP8h62NDZ8GduFy/F1+DP9H6zhKPlYgivzXIRuwt7FlnF97o6/bRtjQve5Y3mw4iYib+5m4tSfRCVFGH0cx3Mnru1h15Ct8vTrwvzqjtY7zVM3LVKWtdw1mHt3OnaR4reMo+VS+L/L7b1xg4+WTDKvbipIuhUw2TtNKLzGy1RJiEm/w7ebniYw+arKxlKe7cf88C/cNo2zh6vRpNA0bYdk/4h/5dyYpTcfUI1u1jqLkU5b9G5BHeqnni+B1lHEtzIBazU0+Xo1STXm33WrsbR2ZvO0lDl/ZaPIxlX8lpMYyZ3d/bG3sGNJ8IU72rlpHylYVjxL0rtGIZREHiYi5qXUcJR/K10X+93OHOR59lfcbdMLZzrRNqLKULlyV99r/iZdHTebvHcyW0wvVmTdmkK5PY9G+YdxJuMLgZvMp5uatdSSDjfZ9Dnd7R74MUafjKsaXb4t8oi6Vb8M24VvMm+6V6pp17EJOxRjdZjn1vTux6vBX/Br6Men6NLNmKGh+P/INp27s5jX/L6laIlDrODni6eTKKN/n2Hn1DDuiIrSOo+Qz+bbIzz2xi5uJ9/kssKsm87IOdk4MaDqb9jUHs+vsUubs7keyTh1cM4V951ewLWIxbar1oVnlV7WOkytv1WhMBfeifBmyjjR9utZxlHwkXxb5awmxzD2+m/9VqIt/yfKa5bARNvT0fZ/XAyZw6sYeJm19kZjE65rlyY/O3Q7h/w59SM1SzelV/yOt4+Sag60dHwV05sy9W/xyJkTrOEo+YlCRF0J0FEJECCHOCSHee8Lro4UQp4QQx4QQ24QQ2lVWYGLoJiSSD/w7ahnjgRZVXmN4y5+4E3+Zbzd35/JddZWjMUQnRDFvzyCKunoxoOlsbG3stI6UJx3K+dCoVEUmh23hfqq6mbxiHNkWeSGELTAb6AT4AK8KIXweW+ww4C+lrAusAiYaO6ihjt6JYtX5MPr5NMXbvYhWMf7Dp3QLxrdbjRC2TN72IseubtM6klVL1iUwZ/cA0vQ6hrZYhKtDYa0j5ZkQgk8DuhKTksjMo9u1jqPkE4bsyQcC56SUF6SUqcByoPvDC0gpd0gpEzMfHgC8jBvTMFJKvgheSzEnN4bXba1FhGcq61Gd99v/QclClZmzpz87zvykdSSrpJd6fjowmquxp+nfZBalClXJ/k1Wok6xsrxYxY8fTu3jUly01nGUfMCQIl8WeLgnalTmc0/TD9iQl1C5tf7SCQ7ejGSsXzvcHbTvNvgkhZ1LMva536hbpi3LQz9lRejn6NWBthxZe2I6h6M20tP3A2qXaaV1HKMb36ADtjY2fHNIXWeh5J1RJzGFEG8A/kDLp7w+EBiY9Xjnzp1GG1sn9XwctQcvezdKX0tg53XjrdsUashXSXYVbD/zAxGXDtPEYwh2No5ax8oRY24/Q11OCmbfvdlUdG6G3fUq7Lxh/gzm0NmtPL9HHmfextXUcDL+tKMW207RhsjuQh0hRGPgMyllh8zH7wNIKSc8tlxbYBbQUkp5K9uBhZDGvEho3vHdfHVoPb+070eLslWNtl5T23HmZ1aEfYa3hw/DWv5gMe1wsyOEMPtFXpfvHmfi1l6U86zFO21+xd7Wuj4UcyIpLZUWv0+hhIs7f3cNMuppwFpsO8V4hBChUkp/Q5c35CcnBKgqhKgohHAAXgH+emzQ+sB8oJshBd7YopPjmXF0G228qltVgQdoXe0thjZfxM24C0zY/DxX753WOpJFik26xZw9A3BzLMLgZvPzdYEHcLZz4D3/jhy9E8Xq80e0jqNYsWyLvJQyDRgGbALCgd+klCeFEF8IIbplLjYJcANWCiGOCCH+esrqTGLq4a0kpun4OKCLOYc1mjpln2Nc21VImc7ELT05eX2X1pEsii49hXl7B5GQco+g5gsp5Fxc60hm8UKletQr5sW3oRtJSkvVOo5ipQz6DiilXC+lrCalrCyl/DrzuU+klH9l/rutlLKklNI380+3Z6/ReM7cu8myiGDeqN6Qqh4lzDWs0Xl71uK99n9SzK0c3+/qw+5z/6d1JIsgpeT/Qj7gwp0w3m40hXJFamsdyWxshA2fBnblRuJ95p3YrXUcxUpZ/RWvXwavx9XegTH122odJc88XUozru1KfEq34P9CPuD3w9+gl3qtY2lq6+mF7L+4iq61R9GgnHV+U8uLwJIV6FKhDnOO7+JGorohjZJzVl3kd149w46rEYys14YiTpbfVtYQTvZuBDVfRKuqb7L59HwW7gsiNa1gXv144toOfj86AT/vTnSpPVLrOJr5wL8j6Xo9E0M3aR1FsUJWW+TT9Ol8GbyO8u5FebtmE63jGJWtjR2vNPiCF+t/wuErG5m6/RXuJ93WOpZZXY89y8J/huNVuAZvN5pq8Tf/MKXy7kXp69OUlefCOH7nqtZxFCtjtb85v54JIeLeTT7074SjrXX3LHkSIQRta/RjcPP5RN0L59stz3Mt9ozWscwiIeUec3b3x97GkaAWi3C0c9E6kuZG1GuDp6MLn4esVac/KjlilUX+fmoykw9voWHJinQqX0vrOCbl69WBsc+tRJeewsQtPQm/sVfrSCaVrk9j4b6hRCdeZXDzeRRxfdbF1QVHIQcnxvq148CNi2y6fErrOIoVscoiP+voDu4mJ/JpYBeEEFrHMbkKRevyXvs/8HQpzcydb7Hv/AqtI5nMysNfEn5zL68HfEOV4gFax7Eor1ULoJpHCb4KWU9quroJjWIYqyvyl+PusvjUXnpVqU/dYpr0QdNEUVcvxrddRfWSjVkSPJ4/jk7Md2fe7Dn3KzvO/MRz1fvRtNJLWsexOHY2tnwc0IXIuGh+Pr1f6ziKlbC6Iv/NoQ3Y2tgw3q+D1lHMztmhEMNb/kizyq+y4dRsfvhnJLr0/HHmzZlbB/nl0Ef4lGpJT98PtI5jsVp7VadV2WpMP7KNmOQEreMoVsCqinzIzUjWRh5nSO2WlHa1/v7huWFrY88bARPoUe99Qi7/xbTtrxGfclfrWHlyJ/4K8/cOprhbeQY0nWX1N/8wtY8DuhCvS2XqEXVPAiV7VlPk9VLPZ8FrKeVSiMG1W2gdR1NCCDr4DGZg0zlcjjnBt5uf5+b9C1rHypVkXTxzdvdHr09jaItFuOSDm3+YWnXPkrxePZAlpw9w7p7ZW0UpVsZqivwfF45y9E4U7zXogIu9g9ZxLEKDcl0Y3WY5ybp4vt3yPGduHdQ6Uo7opZ4f9r/DtftnGNB0NiULVdI6ktUYU78tLnb2fHVovdZRFAtnFUU+KS2VCYc2UrdoWXpUrq91HItSqZgf77b/g0JOxZi+43UOXFytdSSD/XV8CkevbubF+h/jU7pgfzvLqaJOboyo14atV06z59pZreMoFswqivz8E3u4nhjLp4FdC/SVj09T3K0c49utoUoxf3488A5/H59u8RfMhFz6iw0nv6dppZdpU62P1nGsUl+fppRzK8LnwetI1+evM60U47H4inkj8T6zj++kc/naNCxVUes4FsvVoTAjWi2hccUXWXtiGj8dGI0uPUXrWE8UGX2Mnw+OpUrxAF7z/6pAXOtgCo62dnwQ0InTMTdYfvaQ1nEUC2XxRX5S2CbS9Xo+8O+kdRSLZ2frwFsNJ9G97lgORK5m5s43SUi5p3WsR8Qm3WTungG4OxZjULN52Nmq4yt50aV8bQJLVmBS2GbiUvPH6bSKcVl0kT8RfZXfzobRx6cpFQoV1TqOVRBC0LnWcPo1nsmFO2F8t+UFbsdd0joWALr0ZObsGUiS7j5BLRZRyKmY1pGsnhCCTwK6cCc5ntnHd2odR7FAFlvkpZR8HrwOT0cXRtRtrXUcqxNYoTvvtPmF+JQYvt3yPOdva/t1XkrJ0uD3iIw+Qp9G0/D29NE0T37iW9ybnpXrs/DkXq7EWfc1E4rxWWyR33z5FPtvXGBM/bYUdnTWOo5VqlI8gHfbr8HFoTBTt79GyKW/NcuyOXw+ByPX0K3OaOp7d9QsR371boOOCATfqp7zymMsssinpqfxZch6qhYuwevVA7WOY9VKulfk3XZrqFC0Hov+Gcb6k9+b/cybY1e3sebotzQo15XOtUaYdeyCooxrYQbXacGfF48SessypucUy2CRRX7J6QNExkXzcWAX7GxstY5j9dwcPRnVehmB5Z/nz2OTWBo8nnS9zixjX4s9w+J/RuDtWYu3G05WZ9KY0JDaLSjp7M5nwarnvPIviyvyMckJTDuylZZlqtK6bDWt4+Qb9raO9G08nS61R7Lvwm/M3PkWiamxJh0zPiWG2bv74WDnzJDmC3GwU9NupuRq78i7DTpw+PYV/rx4VOs4ioWwuCI/7cg24nQpfFxAesWbkxCCbnVG83ajqZy9HczELT25E3/FJGOl63Us2DuEe4k3GNJ8AUVcy5hkHOVRvar4UbtIGSYc2khSmnm+rSmWzaKK/PnY2yw5fYDXqgVSw7OU1nHyrcYVezKy1VJik27y7ebnuRh9xOhjrAj7nIhb+3kj8FsqFfMz+vqVJ7MRNnwS2IWrCfdYdDJ/30VMMYxFFfmvQtbjZGfP2PrttI6S71Uv2Zh326/B0c6ZKdteJuzKBqOte9fZZew6u5R2NQbSuGJPo61XMUyT0pXpUM6H74/t4FZinNZxFI1ZTJHfe+0cW66EM7xua4o5u2kdp0AoVagK77X/E29PHxbsHcLm8Pl5PmAXcXM/y0M/pXaZ1vSo956Rkio59aF/Z1L16Uw6vFnrKIrGLKLIp+v1fB68Fm83T/r5NNU6ToHi7lSUd1r/ip93Z34/8g2/HPqQdH3u7h96O/4y8/cOpqR7Bfo3nomNOjNKM5UKF+Ptmo1ZfuYQp+5e0zqOoiGLKPIrzh0iPOYGH/p3wsnOXus4BY6DnRP9m35PR5+h7D73f8ze3ZckXc6+5ifp4pizux8SSVCLxTg7FDJRWsVQI+u1obCjM18Er1enVBZgmhf5eF0Kk8I2E1CiPF0q1NE6ToFlI2x4od54egd+S/iNvUza0ou7CYbtAer16fywfxQ37p9nYNM5lHCvYNqwikE8HF0Y7fsce6+fY1vUaa3jKBrRvMjPPraT20nxfBLYVZ0yaQGaVX6V4S1/JjrxKt9u6c7lu8ezfc+fxyZz7OpWXvL7hJqlmpkhpWKo3jUaUblwcb4MWY9On651HEUDmhb5qPgYFpzcwwuVfKlf3FvLKMpDfEo35912q7EV9kza+iJHr2596rIHI9ewMXwOzSu/Rquqb5kxpWIIextbPg7ozPnY2yw9fUDrOIoGNC3yE0I3AvB+A9WwytKUKVyN99r/QenCVZm7uz/bI378zzIXo4+w5OC7VC3ekFcafK6+iVmo57xq0LxMFaYe2ca9lESt4yhmZlCRF0J0FEJECCHOCSH+c16cEMJRCLEi8/WDQogKhqz3zwtHGVy7BWXcPHKWWjGLws4lGPvcb9Tzas+KsM9YHvop+oe+8s/dM4DCzsUZ3Fzd/MOSZfSc78r91CSmH9mmdRzFzLIt8kIIW2A20AnwAV4VQjzeDLwfECOlrAJMA74zZPASzu4E1WmZs8SKWTnYOTOo6Vza1RjAjjM/MWfPAOKSowFI1iUwtMVi3ByLaJxSyU7NIqV4pWoAP4Xv1zqKYmZ2BiwTCJyTUl4AEEIsB7oDpx5apjvwWea/VwHfCyGEzOa8rfF+7XG1d8xxaMW8bGxs6VX/I4q7lefX0E/48O+Mg6t9G0+nrEcNjdMphhrn144/LxwBYO7xXdqGUczGkCJfFni4i1UU0PBpy0gp04QQsUBR4M7DCwkhBgIDsx6XuBrPzms7c55a0Yg3LTxGsf/eAgDunXNg57md2kZScqRX4cp8A3x9yHhtLBTLJrK7SEII0QvoKKXsn/m4N9BQSjnsoWVOZC4Tlfn4fOYyd560zsxlstvRVyyUlBIbGxt1gY2VEkKQkJqidQwll1wdHEOllP6GLm/InvxV4OHzG70yn3vSMlFCCDugMBBtaAjFuqizaKyfi706UF5QGHJ2TQhQVQhRUQjhALwC/PXYMn8BWSdJ9wK2q910RVEU7WW7J585xz4M2ATYAj9IKU8KIb4ADkkp/wIWA0uFEOeAu2R8ECiKoigay3ZO3mQDqzl5qyaEUHPyVkptO+smhMjRnLzmvWsURVEU01FFXlEUJR9TRV5RFCUfU0VeURQlHzPkPHlTiRdCRGg4vpI3xYQQT73YTbFoattZt+o5WVjLIh+RkyPEimURQhxS2886qW1n3YQQh3KyvJquURRFycdUkVcURcnHtCzyCzQcW8k7tf2sl9p21i1H20+zK14VRVEU01PTNYqiKPmYKvKKoij5mCryiqIo+Zgq8oqiKPmYWS+GEkJUA8YB5R8eW0rZxpw5lNwTQjQBKvDo9luiWSAlR9T2K3jMfcXrSmAesBBIN/PYSh4JIZYClYEj/Lv9JKCKhBVQ2896CSF6AN8BJQCR+UdKKQtl+15znkKZ2ey+gdkGVIxKCBEO+Ki7vVgntf2sV+Zd9/4npQzP6XvNMicvhCgihCgC/C2ECBJClM56LvN5xTqcAEppHULJNbX9rNfN3BR4MNOevBDiIhlfC8UTXpZSykomD6HkmhDibzK2nzvgCwQDKVmvSym7aZNMMYTaftYrc5oGoCUZH9B/8Oi2W53tOsw8XeMkpUzO7jnFsgghWj7rdSnlLnNlUXJObT/rJYT48RkvSyll32zXYeYiHyal9MvuOcUyCSG+k1K+m91zimVS2896CSGaSin3Zffck5hrTr6UEKIB4CyEqC+E8Mv80wpwMUcGxSjaPeG5TmZPoeSW2n7Wa5aBz/2HuU6h7AC8DXgBUx96Pg74wEwZlFwSQgwBgoBKQohjD73kDmS7J6FoS20/6yWEaAw0AYoLIUY/9FIhwNagdZh5uqanlPJ3sw2oGIUQojDgCUwA3nvopTgp5V1tUimGUtvPemUeT2kFDCbjGqMsccDfUsqz2a7DzEV+9BOejgVCpZRHzBZEyZHsTnNVhcI6PGU7xkkpdWYPo+SIEKK8lPJSrt5r5iL/C+AP/J35VFfgGBmXWa+UUk40WxjFYI+dAlsOiMn8twdwWUpZUbt0iqGEEJGAN49uvxvATWCAlDJUs3DKEz10+usTGXL6q7kblHkBflLKMVLKMUADMi7TbUHGnL1igaSUFTOvZdhKxlV3xaSURcn4kN6sbTolB7YAnR/afp2AtWTM18/RNJnyNJOBKcBFIImMljALgXjgvCErMPee/GmgTtbXQyGEI3BUSllDCHFYSlnfbGGUHBNCHJdS1snuOcUyPWX7HZNS1hVCHJFS+moUTcmGEOKQlNI/u+eexNwNyv4POCiE+DPz8f+AX4QQrsApM2dRcu6aEOIjYFnm49eBaxrmUXLmuhDiXWB55uOXgZtCCFtAr10sxQCuQohKUsoLAEKIioCrIW80+z1ehRABZJwSBLBPSnnIrAGUXMs8cPcpGdNrALuBz9WBV+sghChGxvZrlvnUPuBzMk5+KCelPKdVNuXZhBAdybiB9wUyjqeUBwZJKTdl+14NirwtUJJH+1lfNmsIRVEUK5M5vV0j8+FpKWXKs5Z/8D4zz8kPJ2NP4iYZ/ayzeiLXNVsIJceEENOllKOedqRfNbiyDpk37RnLf28aom7aY6GEEG2klNsfalT2CEMalJl7Tn4kUF1KGW3mcZW8WZr592RNUyh5lXXTnkWom/ZYi5bAdjKOXz5OAhbXhXIH0E5KmWa2QRWjEUI8B/wjpUzSOouSc+qmPQWTuYv8YqA6sI5HeyJPfeqbFIshhPgZaAzcBfaQceB1r5QyRtNgikGEEJ8Bt4A1PPr7pw6cWzghxHngABm/d3uklCcNfq+Zi/ynT3peSvm52UIoeSaEKAP0ImN+t4yU0tzTfkouZF65/Dh10x4rkHnQtSHQHGhKxs7yMSnlC9m916y/nFnFXAjhIqVMNOfYSt4JId4g44esDnAH+J6MPQvFCqj2E1YtHdBl/q0n4xvZLUPeaO49+cbAYsBNSllOCFGPjHM9g8wWQsk1IcQdMi6lngfskFJGaptIyQkhhAswmoxz4gcKIaqScSLEWo2jKdkQQiQCx8lo1b41JyevmLvIHyTja/5fWS0MhBAnpJS1zRZCyRMhRC0yLoZqBlQFIqSUvbVNpRhCCLECCAXelFLWziz6/6h2BpZPCNGdjN+5QCAV+AfYLaXclt17zd2gDCnllceeUqdyWQkhRCEyulCWJ+Nc68Koy+GtSeXMTq86gMwpU6FtJMUQUso/pZTjgEHAejIaOhr0DczcB8yuCCGaAFIIYU/GefPhZs6g5N7eh/58L6WM0jiPkjOpQghnMi9oE0JU5qGzbBTLJYT4HahHxnTpbuBN4KBB7zXzdE0xYAbQlow9iM3ASHVxVP4ghJglpRyudQ7lyYQQ7YCPAB8yfveaAm9LKXdqmUvJnhDCHzgspXzizIcQop2UcssTXzN37xol/xJChEkp/bTOoTydEKIo0IiMnawDUso7GkdSjOBZv3tmma4RQszi2Xc3GWGOHIpSEAkhHv/lv575dzkhRDkpZZi5MylG99RjK+aak1fthBVFO1Oe8ZoEVIMy6/fUnWizFHkp5c+GLKfmdK2eOlPDAkkpWxuy3LPmdRXrZfZTKLPRVOsASp7M0DqAkiffaR1AybXIp72geo4o2TL0jvFSyp/MlUkxCfVNzMI8rY98lqx+8lLKpy6nirxiCNVHvmBQp9pZnif1kc9iUD95Syvyak/CAkkpd2mdQVEKIilln7yuw9KKvJrTtWCZDa0mkHExjVPW86pVbb4RqXUA5emEEF2AWjz6u/dFdu8z13nyak43f/iRjHv0TgNaA32wvIP3ymOMMa+raEsIMQ9wIeP3bhEZjR6DDXqvOa54FUK0fNbrajrAOmTdPk4IcVxKWefh57TOpjydEOLHZ7wspZR9zRZGyRUhxDEpZd2H/nYDNkgpm2f3XnOdJ6+KeP6QIoSwAc4KIYYBVwE3jTMp2TDGvK6iuaz7Kidm3pktGihtyBvNOiev5nSt3kgyvjKOAL4k40rJtzRNpORIbud1Fc2tFUJ4AJOAMDKmvxcZ8kZzd6Hcy79zuv8jc05XSvmJ2UIoeZbZV15KKeO0zqIY7mnzulLKfpoGU7IlhHCUUqZk/ZuMD+nkrOeexdwHzZwz72QipJSXpJSfAV3MnEHJJSGEvxDiOHAMOC6EOCqEUPPx1qOJlPJNICbzfsuNgWoaZ1IMsz/rH1LKFCll7MPPPYu5T6FUc7rW7QcgSEq5B0AI0YyMM27qappKMVSu53UVbQghSgFlAWchRH3+vZaoEBnfyrJl7iKv5nStW3pWgQeQUu4VQqRpGUjJkVzP6yqa6UDGrf68yLiJd5b7wAeGrECTm4aoOV3rJISYDjgDv5JRIF4GkoFlAKovuWXLy7yuoi0hRE8p5e+5eq+ZD7z6k/H13j3zqVigr5Qy1GwhlFwTQux4xstSSqn6kluwJ909SN3NyzpkTtt8DZSRUnYSQvgAjaWUi7N7r7mna9ScrhUztC+5YlmMMa+raO7HzD8fZj4+A6wALK7IqzldKyaEKAl8Qy72JhRN5XleV9FcMSnlb0KI9wGklGlCiCfe1Ptx5i7yu4QQ83l0Tndn1j0o1ZyuxfuJXO5NKNrJvDPbz3mZ11U0l5B5E3YJIIRoRMZ0d7bMPSev5nStmBAiREoZIIQ4LKWsn/ncESmlr8bRFAPkZV5X0VbmjvAsMq5WPgkUB3pJKY9l916z7smrOV2rl+u9CcUi5HpeV9HcKWANkAjEAX+Qsf2yZdYrXoUQJYUQi4UQGzIf+wgh1CXV1mM08BdQWQixD1gCqBuvW49iUsrfAD1kzOsCBs3rKppbAtQg45jYLDKuVF5qyBvNPSf/E2pPwppVBjoB3kBPoCGWd+MZ5enUNzHrVVtK6fPQ4x1CiFOGvNHcvWvUnoR1+1hKeR/wJKPJ1RxgrraRlBzI+iZWSX0TszphmR/KAAghGgKHDHmjuffC1J6Edcv6QO4CLJRSrhNCfKVlICVHcj2vq2iuAfCPEOJy5uNyQERmw0AppXzqtUbmPrsm6whxbeAEOThCrGhPCLGWjKZy7QA/MhpeBUsp62kaTDGIEOI3Ms6N/7/Mp14DPKSUL2qXSjGEEKL8s16XUl562mvm3pNXc7rW7SWgIzBZSnlPCFEaGKdxJsVwuZ7XVbT1rCKeHXPPyas5XSsmpUyUUq6WUp7NfHxdSrlZ61yKwXI9r6tYL3MX+f/M6QIOZs6gKAVV1rxupBAikoybTgQIIY4LIdSUaT5l7qmSq5ltDdoB32W2OzX3B42iFFQdtQ6gmJ+5D7y6kPGDdlxKeTZzTreO+sqvKIpiGprcNERRFEUxDzVVoiiKko+pIq8oipKPqSKvKIqSj6kiryiKko+pIq8oipKP/T/ALWTCwbu6XwAAAABJRU5ErkJggg==\n", 2433 | "text/plain": [ 2434 | "
" 2435 | ] 2436 | }, 2437 | "metadata": { 2438 | "needs_background": "light" 2439 | }, 2440 | "output_type": "display_data" 2441 | } 2442 | ], 2443 | "source": [ 2444 | "x_grp, x_grp_norm=coord_plot(iris, 'cluster')" 2445 | ] 2446 | }, 2447 | { 2448 | "cell_type": "code", 2449 | "execution_count": 46, 2450 | "metadata": {}, 2451 | "outputs": [ 2452 | { 2453 | "data": { 2454 | "text/html": [ 2455 | "
\n", 2456 | "\n", 2469 | "\n", 2470 | " \n", 2471 | " \n", 2472 | " \n", 2473 | " \n", 2474 | " \n", 2475 | " \n", 2476 | " \n", 2477 | " \n", 2478 | " \n", 2479 | " \n", 2480 | " \n", 2481 | " \n", 2482 | " \n", 2483 | " \n", 2484 | " \n", 2485 | " \n", 2486 | " \n", 2487 | " \n", 2488 | " \n", 2489 | " \n", 2490 | " \n", 2491 | " \n", 2492 | " \n", 2493 | " \n", 2494 | " \n", 2495 | " \n", 2496 | " \n", 2497 | " \n", 2498 | " \n", 2499 | " \n", 2500 | " \n", 2501 | " \n", 2502 | " \n", 2503 | " \n", 2504 | " \n", 2505 | " \n", 2506 | "
sepal_lengthsepal_widthpetal_lengthpetal_widthcluster
05.0060003.4280001.4620000.2460000
15.9016132.7483874.3935481.4338711
26.8500003.0736845.7421052.0710532
\n", 2507 | "
" 2508 | ], 2509 | "text/plain": [ 2510 | " sepal_length sepal_width petal_length petal_width cluster\n", 2511 | "0 5.006000 3.428000 1.462000 0.246000 0\n", 2512 | "1 5.901613 2.748387 4.393548 1.433871 1\n", 2513 | "2 6.850000 3.073684 5.742105 2.071053 2" 2514 | ] 2515 | }, 2516 | "execution_count": 46, 2517 | "metadata": {}, 2518 | "output_type": "execute_result" 2519 | } 2520 | ], 2521 | "source": [ 2522 | "x_grp" 2523 | ] 2524 | }, 2525 | { 2526 | "cell_type": "code", 2527 | "execution_count": 47, 2528 | "metadata": {}, 2529 | "outputs": [ 2530 | { 2531 | "data": { 2532 | "text/html": [ 2533 | "
\n", 2534 | "\n", 2547 | "\n", 2548 | " \n", 2549 | " \n", 2550 | " \n", 2551 | " \n", 2552 | " \n", 2553 | " \n", 2554 | " \n", 2555 | " \n", 2556 | " \n", 2557 | " \n", 2558 | " \n", 2559 | " \n", 2560 | " \n", 2561 | " \n", 2562 | " \n", 2563 | " \n", 2564 | " \n", 2565 | " \n", 2566 | " \n", 2567 | " \n", 2568 | " \n", 2569 | " \n", 2570 | " \n", 2571 | " \n", 2572 | " \n", 2573 | " \n", 2574 | " \n", 2575 | " \n", 2576 | " \n", 2577 | " \n", 2578 | " \n", 2579 | " \n", 2580 | " \n", 2581 | " \n", 2582 | " \n", 2583 | " \n", 2584 | "
sepal_lengthsepal_widthpetal_lengthpetal_widthcluster
00.000001.0000000.0000000.0000000
10.485690.0000000.6849240.6508691
21.000000.4786511.0000001.0000002
\n", 2585 | "
" 2586 | ], 2587 | "text/plain": [ 2588 | " sepal_length sepal_width petal_length petal_width cluster\n", 2589 | "0 0.00000 1.000000 0.000000 0.000000 0\n", 2590 | "1 0.48569 0.000000 0.684924 0.650869 1\n", 2591 | "2 1.00000 0.478651 1.000000 1.000000 2" 2592 | ] 2593 | }, 2594 | "execution_count": 47, 2595 | "metadata": {}, 2596 | "output_type": "execute_result" 2597 | } 2598 | ], 2599 | "source": [ 2600 | "x_grp_norm" 2601 | ] 2602 | }, 2603 | { 2604 | "cell_type": "code", 2605 | "execution_count": null, 2606 | "metadata": {}, 2607 | "outputs": [], 2608 | "source": [] 2609 | } 2610 | ], 2611 | "metadata": { 2612 | "kernelspec": { 2613 | "display_name": "Python 3", 2614 | "language": "python", 2615 | "name": "python3" 2616 | }, 2617 | "language_info": { 2618 | "codemirror_mode": { 2619 | "name": "ipython", 2620 | "version": 3 2621 | }, 2622 | "file_extension": ".py", 2623 | "mimetype": "text/x-python", 2624 | "name": "python", 2625 | "nbconvert_exporter": "python", 2626 | "pygments_lexer": "ipython3", 2627 | "version": "3.7.5" 2628 | } 2629 | }, 2630 | "nbformat": 4, 2631 | "nbformat_minor": 4 2632 | } 2633 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "funpymodeling" 3 | version = "0.1.7" 4 | description = "A package designed for data scientists and teachers, to speed up their ML projects, focused on exploratory data analysis, data preparation, and model performance." 5 | license="MIT" 6 | authors = ["Pablo Casas "] 7 | readme = "README.md" 8 | repository = "https://github.com/pablo14/funPyModeling" 9 | documentation = "https://github.com/pablo14/funPyModeling" 10 | 11 | [tool.poetry.dependencies] 12 | python = ">=3.8.1,<4.0" 13 | pandas = "^2.0.2" 14 | numpy = "^1.24.3" 15 | matplotlib = "^3.7.1" 16 | typing-extensions = "^4.6.3" 17 | scikit-learn = "^1.2.2" 18 | seaborn = "^0.12.2" 19 | flake8 = "^6.0.0" 20 | jupyter = "^1.0.0" 21 | pre-commit = "^3.3.2" 22 | pytest = "^7.3.1" 23 | 24 | 25 | [build-system] 26 | requires = ["poetry-core"] 27 | build-backend = "poetry.core.masonry.api" 28 | --------------------------------------------------------------------------------