├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── MIT-LICENSE.TXT
├── Makefile
├── README.md
├── funpymodeling
├── __init__.py
├── data_prep.py
├── exploratory.py
├── model_validation.py
└── test
│ ├── __init__.py
│ └── test_funpymodeling.py
├── notebooks
└── quick-start_eng_v1.ipynb
├── poetry.lock
└── pyproject.toml
/.gitignore:
--------------------------------------------------------------------------------
1 | # Custom
2 | my_env/
3 |
4 | # General
5 | syntax: glob
6 | .python-version
7 | .venv
8 | env/*
9 | venv/*
10 | ENV/*
11 | .idea/*
12 | .DS_Store
13 | dython.egg*/*
14 | *run_stuff.py*
15 | build/*
16 | dist/*
17 | build_deploy.sh
18 | site/*
19 | debug.py
20 | AUX/
21 | __pycache__/
22 | *.py[cod]
23 | *$py.class
24 | *.pyc
25 | *.ipynb_checkpoints/
26 | funPyModeling.egg-info/
27 | .ipynb_checkpoints/*
28 | funpymodeling/.ipynb_checkpoints/*
29 | # Distribution / packaging
30 | .Python
31 | build/
32 | develop-eggs/
33 | .pytest_cache/
34 | dist/
35 | downloads/
36 | eggs/
37 | .eggs/
38 | lib/
39 | lib64/
40 | parts/
41 | sdist/
42 | var/
43 | wheels/
44 | *.egg-info/
45 | .installed.cfg
46 | *.egg
47 |
48 | # PyInstaller
49 | # Usually these files are written by a python script from a template
50 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
51 | *.manifest
52 | *.spec
53 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://gitlab.com/pycqa/flake8
3 | rev: 3.7.9
4 | hooks:
5 | - id: flake8
6 | name: flake8 except __init__.py
7 | args: [--exclude=__init__.py]
8 | - id: flake8
9 | name: flake8 only __init__.py
10 | args: [--ignore=F401] # ignore imported unused in __init__.py
11 | files: __init__.py
12 | - repo: local
13 | hooks:
14 | - id: pytest
15 | name: Check pytest unit tests pass
16 | entry: make test
17 | language: system
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at {pcasas.biz@gmail.com} . All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/MIT-LICENSE.TXT:
--------------------------------------------------------------------------------
1 | Copyright 2020 Pablo Casas
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: install
2 |
3 | install:
4 | poetry install
5 |
6 | .PHONY: test
7 |
8 | test:
9 | poetry run pytest --pyargs funpymodeling
10 |
11 | .PHONY: check_style
12 |
13 | check_style:
14 | poetry run flake8 --exclude=__init__.py
15 | poetry run flake8 --ignore F401 funpymodeling/__init__.py
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # funPyModeling
2 | A package to help data scientist in Exploratory Data Analysis and Data Preparation for ML models
3 |
--------------------------------------------------------------------------------
/funpymodeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_prep import todf
2 | from .exploratory import status, corr_pair, num_vars, cat_vars, profiling_num, freq_tbl
3 | from .model_validation import coord_plot
4 |
5 |
6 | __version__ = "0.1.7"
7 |
--------------------------------------------------------------------------------
/funpymodeling/data_prep.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 | def todf(data):
5 | """
6 | It converts almost any object to pandas dataframe. It supports: 1D/2D list, 1D/2D arrays, pandas series. If the object containts +2D it returns an error.
7 | Parameters:
8 | -----------
9 | data: data
10 |
11 | Returns:
12 | --------
13 | A pandas dataframe.
14 |
15 | Example:
16 | --------
17 | >> from numpy import array
18 |
19 | # Different case study:
20 | >> list_1d = [11, 12, 5, 2]
21 | >> todf(list_1d)
22 | >> list_2d = [[11, 12, 5, 2], [15,24, 6,10], [10, 8, 12, 5], [12,15,8,6]]
23 | >> todf(list_2d)
24 | >> list_3d = [[[11, 12, 5, 2], [15,24, 6,10], [10, 8, 12, 5], [12,15,8,6]]]
25 | >> todf(list_3d)
26 | >> array_1d = array(list_1d)
27 | >> todf(array_1d)
28 | >> array_2d = array(list_2d)
29 | >> todf(array_2d)
30 | >> pd_df=pd.DataFrame({'v1':[11, 12, 5, 2], 'v2':[15,24, 6,10]}) # ok
31 | >> todf(pd_df)
32 | >> pd_series=pd_df.v1
33 | """
34 | if isinstance(data, list):
35 | data=np.array(data)
36 |
37 | if(len(data.shape))>2:
38 | raise Exception("I live in flattland! (can't handle objects with more than 2 dimensions)")
39 |
40 | if isinstance(data, pd.Series):
41 | data2=pd.DataFrame({data.name: data})
42 | elif isinstance(data, np.ndarray):
43 | if(data.shape==1):
44 | data2=pd.DataFrame({'var': data}).convert_dtypes()
45 | else:
46 | data2=pd.DataFrame(data).convert_dtypes()
47 | else:
48 | data2=data
49 |
50 | return data2
51 |
52 |
--------------------------------------------------------------------------------
/funpymodeling/exploratory.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from .data_prep import todf
4 |
5 | def status(data):
6 | """
7 | For each variable it returns: Quantity and percentage of zeros (q_zeros and p_zeros respectevly). Same metrics for NA values (q_NA/p_na), and infinite values (q_inf/p_inf). Last two columns indicates data type and quantity of unique values.
8 | status can be used for EDA or in a data flow to spot errors or take actions based on the result.
9 |
10 | Parameters:
11 | -----------
12 | data: It can be a dataframe or a single column, 1D or 2D numpy array. It uses the todf() function.
13 |
14 | Returns:
15 | --------
16 | A pandas dataframe containing the status metrics for each input variable.
17 |
18 | Example:
19 | --------
20 | >> import seaborn as sns
21 | >> iris = sns.load_dataset('iris')
22 | >> # dataframe as input
23 | >> status(iris)
24 | >> # single columns:
25 | >> status(iris['species'])
26 | """
27 | data2=todf(data)
28 |
29 | # total de rows
30 | tot_rows=len(data2)
31 |
32 | # total de nan
33 | d2=data2.isnull().sum().reset_index()
34 | d2.columns=['variable', 'q_nan']
35 |
36 | # percentage of nan
37 | d2[['p_nan']]=d2[['q_nan']]/tot_rows
38 |
39 | # num of zeros
40 | d2['q_zeros']=(data2==0).sum().values
41 |
42 | # perc of zeros
43 | d2['p_zeros']=d2[['q_zeros']]/tot_rows
44 |
45 | # total unique values
46 | d2['unique']=data2.nunique().values
47 |
48 | # get data types per column
49 | d2['type']=[str(x) for x in data2.dtypes.values]
50 |
51 | return(d2)
52 |
53 |
54 | def corr_pair(data, method='pearson'):
55 | """
56 | Calcuate the correlations among all numeric features. Non-numeric are excluded since it uses the `corr` pandas function.
57 | It's useful to quickly extract those correlated input features and the correlation between the input and the target variable.
58 |
59 | Parameters:
60 | -----------
61 | data: pandas data containing the variables to calculate the correlation
62 | method: `pearson` as default, same as `corr` function in pandas.
63 | Returns:
64 | --------
65 | A pandas dataframe containing pairwaise correlation, R and R2 statistcs
66 |
67 | Example:
68 | --------
69 | >> import seaborn as sns
70 | >> iris = sns.load_dataset('iris')
71 | >> corr_pair(iris)
72 | """
73 | data2=todf(data)
74 |
75 | d_cor=data2.corr(method)
76 |
77 | d_cor2=d_cor.reset_index() # generates index as column
78 |
79 | d_long=d_cor2.melt(id_vars='index') # to long format, each row 1 var
80 |
81 | d_long.columns=['v1', 'v2', 'R']
82 |
83 | d_long[['R2']]=d_long[['R']]**2
84 |
85 | d_long2=d_long.query("v1 != v2") # don't need the auto-correlation
86 |
87 | return(d_long2)
88 |
89 |
90 | def num_vars(data, exclude_var=None):
91 | """
92 | Returns the numeric variable names. Useful to use with pipelines or any other method in which we need to keep numeric variables. It `exclude_var` can be a list with the variable names to skip in the result. Useful when we want to skip the target variable (i.e. in a data transformation).
93 | It's also available for categorical variables in the function `cat_vars()`
94 | Parameters:
95 | -----------
96 | data: pandas dataframe
97 | exclude_var: list of variable names to exclude from the result
98 |
99 | Returns:
100 | --------
101 | A list with all the numeric variable names.
102 |
103 | Example:
104 | --------
105 | >> import seaborn as sns
106 | >> iris = sns.load_dataset('iris')
107 | >> num_vars(iris)
108 | """
109 | num_v = data.select_dtypes(include=['int64', 'float64']).columns
110 | if exclude_var is not None:
111 | num_v=num_v.drop(exclude_var)
112 | return num_v
113 |
114 | def cat_vars(data, exclude_var=None):
115 | """
116 | Returns the categoric variable names. Useful to use with pipelines or any other method in which we need to keep categorical variables. It `exclude_var` can be a list with the variable names to skip in the result. Useful when we want to skip the target variable (i.e. in a data transformation). It will include all `object`, `category` and `string` variables.
117 | It's also available for numeric variables in the function `num_vars()`
118 |
119 | Parameters:
120 | -----------
121 | data: pandas dataframe
122 | exclude_var: list of variable names to exclude from the result
123 |
124 | Returns:
125 | --------
126 | A list with all the categoric variable names.
127 |
128 | Example:
129 | --------
130 | >> import seaborn as sns
131 | >> iris = sns.load_dataset('iris')
132 | >> cat_vars(iris)
133 | """
134 | cat_v = data.select_dtypes(include=['object','category', 'string']).columns
135 | if exclude_var is not None:
136 | cat_v=cat_v.drop(exclude_var)
137 | return cat_v
138 |
139 |
140 | def profiling_num(data):
141 | """
142 | Get a metric table with many indicators for all numerical variables, automatically skipping the non-numerical variables. Current metrics are: mean, std_dev: standard deviation, all the p_XX: percentile at XX number, skewness, kurtosis, iqr: inter quartile range, variation_coef: the ratio of sd/mean, range_98 is the limit for which the 98% of fall, range_80 similar to range_98 but with 80%. All NA values will be skipped from calculations.
143 |
144 | Parameters:
145 | -----------
146 | data: pandas series/dataframe, numpy 1D/2D array
147 |
148 | Returns:
149 | --------
150 | A dataframe in which each row is an input variable, and each column an statistic.
151 |
152 | Example:
153 | --------
154 | >> import seaborn as sns
155 | >> iris = sns.load_dataset('iris')
156 | >> profiling_num(iris)
157 | """
158 |
159 | # handling different inputs to dataframe
160 | data=todf(data)
161 |
162 | # explicit keep the num vars
163 | d=data[num_vars(data)]
164 |
165 | des1=pd.DataFrame({'mean':d.mean().transpose(),
166 | 'std_dev':d.std().transpose()})
167 |
168 | des1['variation_coef']=des1['std_dev']/des1['mean']
169 |
170 | d_quant=d.quantile([0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]).transpose().add_prefix('p_')
171 |
172 | des2=des1.join(d_quant, how='outer')
173 |
174 | des_final=des2.copy()
175 |
176 | des_final['variable'] = des_final.index
177 |
178 | des_final=des_final.reset_index(drop=True)
179 |
180 | des_final=des_final[['variable', 'mean', 'std_dev','variation_coef', 'p_0.01', 'p_0.05', 'p_0.25', 'p_0.5', 'p_0.75', 'p_0.95', 'p_0.99']]
181 |
182 | return des_final
183 |
184 |
185 |
186 | def _freq_tbl_logic(var, name):
187 | """
188 | For internal use. Related to `freq_tbl`.
189 |
190 | Parameters:
191 | -----------
192 | var: pandas series
193 | name: column name (string)
194 |
195 | Returns:
196 | --------
197 | Dataframe with the metrics
198 |
199 | Example:
200 | --------
201 |
202 | """
203 | cnt=var.value_counts()
204 | df_res=pd.DataFrame({'frequency': var.value_counts(), 'percentage': var.value_counts()/len(var)})
205 | df_res.reset_index(drop=True)
206 |
207 | df_res[name] = df_res.index
208 |
209 | df_res=df_res.reset_index(drop=True)
210 |
211 | df_res['cumulative_perc'] = df_res.percentage.cumsum()/df_res.percentage.sum()
212 |
213 | df_res=df_res[[name, 'frequency', 'percentage', 'cumulative_perc']]
214 |
215 | return df_res
216 |
217 |
218 |
219 | def freq_tbl(data):
220 | """
221 | Frequency table for categorical variables. It retrieves the frequency, perrcentage and cummulative percentage for each categorical variables (excluding the numerical ones).
222 |
223 | Parameters:
224 | -----------
225 | data: pandas series/dataframe, numpy 1D/2D array
226 |
227 | Returns:
228 | --------
229 | If a single variable is passed, then it returns the table with the results (useful to be used in a processes and take actions based on the result.).
230 | If it contains more than one varible, it will print in the console the result for all the categorical variables (based on cat_vars).
231 |
232 | Example:
233 | --------
234 | > import seaborn as sns
235 | > tips=sns.load_dataset('tips')
236 | > freq_tbl(tips)
237 | """
238 | data=todf(data)
239 |
240 | cat_v=cat_vars(data)
241 | if(len(cat_v)==0):
242 | return('No categorical variables to analyze.')
243 |
244 | if(len(cat_v)>1):
245 | for col in cat_v:
246 | print(_freq_tbl_logic(data[col], name=col))
247 | print('\n----------------------------------------------------------------\n')
248 | else:
249 | # if only 1 column, then return the table for that variable
250 | col=cat_v[0]
251 | return _freq_tbl_logic(data[col], name=col)
252 |
253 |
--------------------------------------------------------------------------------
/funpymodeling/model_validation.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from pandas.plotting import parallel_coordinates
4 | from sklearn.preprocessing import MinMaxScaler
5 | import seaborn as sns
6 | import matplotlib.pyplot as plt
7 |
8 |
9 | def coord_plot(data, group_var):
10 | """
11 | Coordinate plot analysis for clustering models. Also returns the original and the normalized (min-max) variable table. Useful to extract the main features for each cluster according to the variable means.
12 | Parameters:
13 | -----------
14 | data : Pandas DataFrame containing the variables to analyze the mean across each cluster
15 | group_var : String indicating the clustering variable name
16 | Returns:
17 | --------
18 | A tuple containing two data frames. The first contains the mean for each category across each value of the group_var. The other data set is similar but it is min-max normalized, range [0-1].
19 | It also shows the coordinate or parallel plot.
20 | Example:
21 | --------
22 | >> import seaborn as sns
23 | >> iris = sns.load_dataset('iris')
24 | # If species is the cluster variable:
25 | >> coord_plot(iris, 'species')
26 | """
27 | # 1- group by cluster, get the means
28 | x_grp=data.groupby(group_var).mean()
29 | x_grp[group_var] = x_grp.index
30 | x_grp=x_grp.reset_index(drop=True)
31 | x_grp # data with the original variables
32 |
33 | # 2- normalizing the data min-max
34 | x_grp_no_tgt=x_grp.drop(group_var, axis=1)
35 |
36 | mm_scaler = MinMaxScaler()
37 | mm_scaler.fit(x_grp_no_tgt)
38 | x_grp_mm=mm_scaler.transform(x_grp_no_tgt)
39 |
40 | # 3- convert to df
41 | df_grp_mm=pd.DataFrame(x_grp_mm, columns=x_grp_no_tgt.columns)
42 |
43 | df_grp_mm[group_var]=x_grp[group_var] # variables escaladas
44 |
45 | # 4- plot
46 | parallel_coordinates(df_grp_mm, group_var, colormap=plt.get_cmap("Dark2"))
47 | plt.xticks(rotation=90)
48 |
49 | return [x_grp, df_grp_mm]
--------------------------------------------------------------------------------
/funpymodeling/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pablo14/funpymodeling/b399804d5981e8873302980627fb6cc4139d1a10/funpymodeling/test/__init__.py
--------------------------------------------------------------------------------
/funpymodeling/test/test_funpymodeling.py:
--------------------------------------------------------------------------------
1 | from funpymodeling import __version__
2 |
3 |
4 | def test_version():
5 | assert __version__ == '0.1.7'
6 |
--------------------------------------------------------------------------------
/notebooks/quick-start_eng_v1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# funpymodeling > Basic usage\n",
8 | "\n",
9 | "Created by Pablo Casas [@pabloc_ds](https://twitter.com/pabloc_ds)\n",
10 | "\n",
11 | " "
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 48,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "import pandas as pd\n",
21 | "import matplotlib.pyplot as plt\n",
22 | "import numpy as np\n",
23 | "import seaborn as sns"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 49,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "# Load some dataframes for this practice:\n",
33 | "iris = sns.load_dataset('iris')\n",
34 | "tips = sns.load_dataset('tips')"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "## 1) Exploratory Data Analysis"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "### 1.1) Dataset health `status`"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 4,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "from funpymodeling.exploratory import status"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "Support data frame as input:"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 5,
70 | "metadata": {},
71 | "outputs": [
72 | {
73 | "data": {
74 | "text/html": [
75 | "
\n",
76 | "\n",
89 | "
\n",
90 | " \n",
91 | " \n",
92 | " \n",
93 | " variable \n",
94 | " q_nan \n",
95 | " p_nan \n",
96 | " q_zeros \n",
97 | " p_zeros \n",
98 | " unique \n",
99 | " type \n",
100 | " \n",
101 | " \n",
102 | " \n",
103 | " \n",
104 | " 0 \n",
105 | " sepal_length \n",
106 | " 0 \n",
107 | " 0.0 \n",
108 | " 0 \n",
109 | " 0.0 \n",
110 | " 35 \n",
111 | " float64 \n",
112 | " \n",
113 | " \n",
114 | " 1 \n",
115 | " sepal_width \n",
116 | " 0 \n",
117 | " 0.0 \n",
118 | " 0 \n",
119 | " 0.0 \n",
120 | " 23 \n",
121 | " float64 \n",
122 | " \n",
123 | " \n",
124 | " 2 \n",
125 | " petal_length \n",
126 | " 0 \n",
127 | " 0.0 \n",
128 | " 0 \n",
129 | " 0.0 \n",
130 | " 43 \n",
131 | " float64 \n",
132 | " \n",
133 | " \n",
134 | " 3 \n",
135 | " petal_width \n",
136 | " 0 \n",
137 | " 0.0 \n",
138 | " 0 \n",
139 | " 0.0 \n",
140 | " 22 \n",
141 | " float64 \n",
142 | " \n",
143 | " \n",
144 | " 4 \n",
145 | " species \n",
146 | " 0 \n",
147 | " 0.0 \n",
148 | " 0 \n",
149 | " 0.0 \n",
150 | " 3 \n",
151 | " object \n",
152 | " \n",
153 | " \n",
154 | "
\n",
155 | "
"
156 | ],
157 | "text/plain": [
158 | " variable q_nan p_nan q_zeros p_zeros unique type\n",
159 | "0 sepal_length 0 0.0 0 0.0 35 float64\n",
160 | "1 sepal_width 0 0.0 0 0.0 23 float64\n",
161 | "2 petal_length 0 0.0 0 0.0 43 float64\n",
162 | "3 petal_width 0 0.0 0 0.0 22 float64\n",
163 | "4 species 0 0.0 0 0.0 3 object"
164 | ]
165 | },
166 | "execution_count": 5,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "status(iris) "
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "Supports Pandas series:"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 6,
185 | "metadata": {},
186 | "outputs": [
187 | {
188 | "data": {
189 | "text/html": [
190 | "\n",
191 | "\n",
204 | "
\n",
205 | " \n",
206 | " \n",
207 | " \n",
208 | " variable \n",
209 | " q_nan \n",
210 | " p_nan \n",
211 | " q_zeros \n",
212 | " p_zeros \n",
213 | " unique \n",
214 | " type \n",
215 | " \n",
216 | " \n",
217 | " \n",
218 | " \n",
219 | " 0 \n",
220 | " sepal_width \n",
221 | " 0 \n",
222 | " 0.0 \n",
223 | " 0 \n",
224 | " 0.0 \n",
225 | " 23 \n",
226 | " float64 \n",
227 | " \n",
228 | " \n",
229 | "
\n",
230 | "
"
231 | ],
232 | "text/plain": [
233 | " variable q_nan p_nan q_zeros p_zeros unique type\n",
234 | "0 sepal_width 0 0.0 0 0.0 23 float64"
235 | ]
236 | },
237 | "execution_count": 6,
238 | "metadata": {},
239 | "output_type": "execute_result"
240 | }
241 | ],
242 | "source": [
243 | "status(iris['sepal_width'])"
244 | ]
245 | },
246 | {
247 | "cell_type": "markdown",
248 | "metadata": {},
249 | "source": [
250 | "Supports 2D numpy array:"
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": 7,
256 | "metadata": {},
257 | "outputs": [],
258 | "source": [
259 | "tips_np=tips.to_numpy()"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": 8,
265 | "metadata": {},
266 | "outputs": [
267 | {
268 | "data": {
269 | "text/html": [
270 | "\n",
271 | "\n",
284 | "
\n",
285 | " \n",
286 | " \n",
287 | " \n",
288 | " variable \n",
289 | " q_nan \n",
290 | " p_nan \n",
291 | " q_zeros \n",
292 | " p_zeros \n",
293 | " unique \n",
294 | " type \n",
295 | " \n",
296 | " \n",
297 | " \n",
298 | " \n",
299 | " 0 \n",
300 | " 0 \n",
301 | " 0 \n",
302 | " 0.0 \n",
303 | " 0 \n",
304 | " 0.0 \n",
305 | " 229 \n",
306 | " float64 \n",
307 | " \n",
308 | " \n",
309 | " 1 \n",
310 | " 1 \n",
311 | " 0 \n",
312 | " 0.0 \n",
313 | " 0 \n",
314 | " 0.0 \n",
315 | " 123 \n",
316 | " float64 \n",
317 | " \n",
318 | " \n",
319 | " 2 \n",
320 | " 2 \n",
321 | " 0 \n",
322 | " 0.0 \n",
323 | " 0 \n",
324 | " 0.0 \n",
325 | " 2 \n",
326 | " string \n",
327 | " \n",
328 | " \n",
329 | " 3 \n",
330 | " 3 \n",
331 | " 0 \n",
332 | " 0.0 \n",
333 | " 0 \n",
334 | " 0.0 \n",
335 | " 2 \n",
336 | " string \n",
337 | " \n",
338 | " \n",
339 | " 4 \n",
340 | " 4 \n",
341 | " 0 \n",
342 | " 0.0 \n",
343 | " 0 \n",
344 | " 0.0 \n",
345 | " 4 \n",
346 | " string \n",
347 | " \n",
348 | " \n",
349 | " 5 \n",
350 | " 5 \n",
351 | " 0 \n",
352 | " 0.0 \n",
353 | " 0 \n",
354 | " 0.0 \n",
355 | " 2 \n",
356 | " string \n",
357 | " \n",
358 | " \n",
359 | " 6 \n",
360 | " 6 \n",
361 | " 0 \n",
362 | " 0.0 \n",
363 | " 0 \n",
364 | " 0.0 \n",
365 | " 6 \n",
366 | " Int64 \n",
367 | " \n",
368 | " \n",
369 | "
\n",
370 | "
"
371 | ],
372 | "text/plain": [
373 | " variable q_nan p_nan q_zeros p_zeros unique type\n",
374 | "0 0 0 0.0 0 0.0 229 float64\n",
375 | "1 1 0 0.0 0 0.0 123 float64\n",
376 | "2 2 0 0.0 0 0.0 2 string\n",
377 | "3 3 0 0.0 0 0.0 2 string\n",
378 | "4 4 0 0.0 0 0.0 4 string\n",
379 | "5 5 0 0.0 0 0.0 2 string\n",
380 | "6 6 0 0.0 0 0.0 6 Int64"
381 | ]
382 | },
383 | "execution_count": 8,
384 | "metadata": {},
385 | "output_type": "execute_result"
386 | }
387 | ],
388 | "source": [
389 | "status(tips_np)"
390 | ]
391 | },
392 | {
393 | "cell_type": "markdown",
394 | "metadata": {},
395 | "source": [
396 | "Note: data types form numpy to pandas dataframe are infered by: pandas `convert_dtypes`"
397 | ]
398 | },
399 | {
400 | "cell_type": "markdown",
401 | "metadata": {},
402 | "source": [
403 | "Supports 1D numpy array:"
404 | ]
405 | },
406 | {
407 | "cell_type": "code",
408 | "execution_count": 9,
409 | "metadata": {},
410 | "outputs": [
411 | {
412 | "data": {
413 | "text/html": [
414 | "\n",
415 | "\n",
428 | "
\n",
429 | " \n",
430 | " \n",
431 | " \n",
432 | " variable \n",
433 | " q_nan \n",
434 | " p_nan \n",
435 | " q_zeros \n",
436 | " p_zeros \n",
437 | " unique \n",
438 | " type \n",
439 | " \n",
440 | " \n",
441 | " \n",
442 | " \n",
443 | " 0 \n",
444 | " 0 \n",
445 | " 0 \n",
446 | " 0.0 \n",
447 | " 0 \n",
448 | " 0.0 \n",
449 | " 4 \n",
450 | " string \n",
451 | " \n",
452 | " \n",
453 | "
\n",
454 | "
"
455 | ],
456 | "text/plain": [
457 | " variable q_nan p_nan q_zeros p_zeros unique type\n",
458 | "0 0 0 0.0 0 0.0 4 string"
459 | ]
460 | },
461 | "execution_count": 9,
462 | "metadata": {},
463 | "output_type": "execute_result"
464 | }
465 | ],
466 | "source": [
467 | "status(tips_np[:,4])"
468 | ]
469 | },
470 | {
471 | "cell_type": "code",
472 | "execution_count": null,
473 | "metadata": {},
474 | "outputs": [],
475 | "source": [
476 | "\n",
477 | "\n",
478 | "\n",
479 | "\n",
480 | "\n",
481 | "\n",
482 | "\n",
483 | "\n"
484 | ]
485 | },
486 | {
487 | "cell_type": "markdown",
488 | "metadata": {},
489 | "source": [
490 | "### 1.2) Univariate analysis in numeric variables"
491 | ]
492 | },
493 | {
494 | "cell_type": "code",
495 | "execution_count": 11,
496 | "metadata": {},
497 | "outputs": [],
498 | "source": [
499 | "from funpymodeling.exploratory import profiling_num"
500 | ]
501 | },
502 | {
503 | "cell_type": "markdown",
504 | "metadata": {},
505 | "source": [
506 | "`profiling_num` retrieves several statistics for all numeric variables excluding the categorical ones."
507 | ]
508 | },
509 | {
510 | "cell_type": "markdown",
511 | "metadata": {},
512 | "source": [
513 | "Supports dataframe:"
514 | ]
515 | },
516 | {
517 | "cell_type": "code",
518 | "execution_count": 12,
519 | "metadata": {},
520 | "outputs": [
521 | {
522 | "data": {
523 | "text/html": [
524 | "\n",
525 | "\n",
538 | "
\n",
539 | " \n",
540 | " \n",
541 | " \n",
542 | " variable \n",
543 | " mean \n",
544 | " std_dev \n",
545 | " variation_coef \n",
546 | " p_0.01 \n",
547 | " p_0.05 \n",
548 | " p_0.25 \n",
549 | " p_0.5 \n",
550 | " p_0.75 \n",
551 | " p_0.95 \n",
552 | " p_0.99 \n",
553 | " \n",
554 | " \n",
555 | " \n",
556 | " \n",
557 | " 0 \n",
558 | " total_bill \n",
559 | " 19.785943 \n",
560 | " 8.902412 \n",
561 | " 0.449936 \n",
562 | " 7.25 \n",
563 | " 9.5575 \n",
564 | " 13.3475 \n",
565 | " 17.795 \n",
566 | " 24.1275 \n",
567 | " 38.0610 \n",
568 | " 48.2270 \n",
569 | " \n",
570 | " \n",
571 | " 1 \n",
572 | " tip \n",
573 | " 2.998279 \n",
574 | " 1.383638 \n",
575 | " 0.461478 \n",
576 | " 1.00 \n",
577 | " 1.4400 \n",
578 | " 2.0000 \n",
579 | " 2.900 \n",
580 | " 3.5625 \n",
581 | " 5.1955 \n",
582 | " 7.2145 \n",
583 | " \n",
584 | " \n",
585 | " 2 \n",
586 | " size \n",
587 | " 2.569672 \n",
588 | " 0.951100 \n",
589 | " 0.370125 \n",
590 | " 1.00 \n",
591 | " 2.0000 \n",
592 | " 2.0000 \n",
593 | " 2.000 \n",
594 | " 3.0000 \n",
595 | " 4.0000 \n",
596 | " 6.0000 \n",
597 | " \n",
598 | " \n",
599 | "
\n",
600 | "
"
601 | ],
602 | "text/plain": [
603 | " variable mean std_dev variation_coef p_0.01 p_0.05 p_0.25 \\\n",
604 | "0 total_bill 19.785943 8.902412 0.449936 7.25 9.5575 13.3475 \n",
605 | "1 tip 2.998279 1.383638 0.461478 1.00 1.4400 2.0000 \n",
606 | "2 size 2.569672 0.951100 0.370125 1.00 2.0000 2.0000 \n",
607 | "\n",
608 | " p_0.5 p_0.75 p_0.95 p_0.99 \n",
609 | "0 17.795 24.1275 38.0610 48.2270 \n",
610 | "1 2.900 3.5625 5.1955 7.2145 \n",
611 | "2 2.000 3.0000 4.0000 6.0000 "
612 | ]
613 | },
614 | "execution_count": 12,
615 | "metadata": {},
616 | "output_type": "execute_result"
617 | }
618 | ],
619 | "source": [
620 | "profiling_num(tips)"
621 | ]
622 | },
623 | {
624 | "cell_type": "markdown",
625 | "metadata": {},
626 | "source": [
627 | "Also numpy as before:"
628 | ]
629 | },
630 | {
631 | "cell_type": "code",
632 | "execution_count": 13,
633 | "metadata": {},
634 | "outputs": [
635 | {
636 | "data": {
637 | "text/html": [
638 | "\n",
639 | "\n",
652 | "
\n",
653 | " \n",
654 | " \n",
655 | " \n",
656 | " variable \n",
657 | " mean \n",
658 | " std_dev \n",
659 | " variation_coef \n",
660 | " p_0.01 \n",
661 | " p_0.05 \n",
662 | " p_0.25 \n",
663 | " p_0.5 \n",
664 | " p_0.75 \n",
665 | " p_0.95 \n",
666 | " p_0.99 \n",
667 | " \n",
668 | " \n",
669 | " \n",
670 | " \n",
671 | " 0 \n",
672 | " 0 \n",
673 | " 19.785943 \n",
674 | " 8.902412 \n",
675 | " 0.449936 \n",
676 | " 7.25 \n",
677 | " 9.5575 \n",
678 | " 13.3475 \n",
679 | " 17.795 \n",
680 | " 24.1275 \n",
681 | " 38.061 \n",
682 | " 48.227 \n",
683 | " \n",
684 | " \n",
685 | " 1 \n",
686 | " 1 \n",
687 | " 2.998279 \n",
688 | " 1.383638 \n",
689 | " 0.461478 \n",
690 | " 1 \n",
691 | " 1.44 \n",
692 | " 2 \n",
693 | " 2.9 \n",
694 | " 3.5625 \n",
695 | " 5.1955 \n",
696 | " 7.2145 \n",
697 | " \n",
698 | " \n",
699 | " 2 \n",
700 | " 6 \n",
701 | " 2.569672 \n",
702 | " 0.951100 \n",
703 | " 0.370125 \n",
704 | " 1 \n",
705 | " 2 \n",
706 | " 2 \n",
707 | " 2 \n",
708 | " 3 \n",
709 | " 4 \n",
710 | " 6 \n",
711 | " \n",
712 | " \n",
713 | "
\n",
714 | "
"
715 | ],
716 | "text/plain": [
717 | " variable mean std_dev variation_coef p_0.01 p_0.05 p_0.25 \\\n",
718 | "0 0 19.785943 8.902412 0.449936 7.25 9.5575 13.3475 \n",
719 | "1 1 2.998279 1.383638 0.461478 1 1.44 2 \n",
720 | "2 6 2.569672 0.951100 0.370125 1 2 2 \n",
721 | "\n",
722 | " p_0.5 p_0.75 p_0.95 p_0.99 \n",
723 | "0 17.795 24.1275 38.061 48.227 \n",
724 | "1 2.9 3.5625 5.1955 7.2145 \n",
725 | "2 2 3 4 6 "
726 | ]
727 | },
728 | "execution_count": 13,
729 | "metadata": {},
730 | "output_type": "execute_result"
731 | }
732 | ],
733 | "source": [
734 | "profiling_num(tips_np)"
735 | ]
736 | },
737 | {
738 | "cell_type": "markdown",
739 | "metadata": {},
740 | "source": [
741 | "Pandas series & 1D array:"
742 | ]
743 | },
744 | {
745 | "cell_type": "code",
746 | "execution_count": 14,
747 | "metadata": {},
748 | "outputs": [
749 | {
750 | "data": {
751 | "text/html": [
752 | "\n",
753 | "\n",
766 | "
\n",
767 | " \n",
768 | " \n",
769 | " \n",
770 | " variable \n",
771 | " mean \n",
772 | " std_dev \n",
773 | " variation_coef \n",
774 | " p_0.01 \n",
775 | " p_0.05 \n",
776 | " p_0.25 \n",
777 | " p_0.5 \n",
778 | " p_0.75 \n",
779 | " p_0.95 \n",
780 | " p_0.99 \n",
781 | " \n",
782 | " \n",
783 | " \n",
784 | " \n",
785 | " 0 \n",
786 | " total_bill \n",
787 | " 19.785943 \n",
788 | " 8.902412 \n",
789 | " 0.449936 \n",
790 | " 7.25 \n",
791 | " 9.5575 \n",
792 | " 13.3475 \n",
793 | " 17.795 \n",
794 | " 24.1275 \n",
795 | " 38.061 \n",
796 | " 48.227 \n",
797 | " \n",
798 | " \n",
799 | "
\n",
800 | "
"
801 | ],
802 | "text/plain": [
803 | " variable mean std_dev variation_coef p_0.01 p_0.05 p_0.25 \\\n",
804 | "0 total_bill 19.785943 8.902412 0.449936 7.25 9.5575 13.3475 \n",
805 | "\n",
806 | " p_0.5 p_0.75 p_0.95 p_0.99 \n",
807 | "0 17.795 24.1275 38.061 48.227 "
808 | ]
809 | },
810 | "execution_count": 14,
811 | "metadata": {},
812 | "output_type": "execute_result"
813 | }
814 | ],
815 | "source": [
816 | "profiling_num(tips['total_bill'])"
817 | ]
818 | },
819 | {
820 | "cell_type": "code",
821 | "execution_count": 15,
822 | "metadata": {},
823 | "outputs": [
824 | {
825 | "data": {
826 | "text/html": [
827 | "\n",
828 | "\n",
841 | "
\n",
842 | " \n",
843 | " \n",
844 | " \n",
845 | " variable \n",
846 | " mean \n",
847 | " std_dev \n",
848 | " variation_coef \n",
849 | " p_0.01 \n",
850 | " p_0.05 \n",
851 | " p_0.25 \n",
852 | " p_0.5 \n",
853 | " p_0.75 \n",
854 | " p_0.95 \n",
855 | " p_0.99 \n",
856 | " \n",
857 | " \n",
858 | " \n",
859 | " \n",
860 | " 0 \n",
861 | " 0 \n",
862 | " 19.785943 \n",
863 | " 8.902412 \n",
864 | " 0.449936 \n",
865 | " 7.25 \n",
866 | " 9.5575 \n",
867 | " 13.3475 \n",
868 | " 17.795 \n",
869 | " 24.1275 \n",
870 | " 38.061 \n",
871 | " 48.227 \n",
872 | " \n",
873 | " \n",
874 | "
\n",
875 | "
"
876 | ],
877 | "text/plain": [
878 | " variable mean std_dev variation_coef p_0.01 p_0.05 p_0.25 \\\n",
879 | "0 0 19.785943 8.902412 0.449936 7.25 9.5575 13.3475 \n",
880 | "\n",
881 | " p_0.5 p_0.75 p_0.95 p_0.99 \n",
882 | "0 17.795 24.1275 38.061 48.227 "
883 | ]
884 | },
885 | "execution_count": 15,
886 | "metadata": {},
887 | "output_type": "execute_result"
888 | }
889 | ],
890 | "source": [
891 | "profiling_num(tips_np[:,0])"
892 | ]
893 | },
894 | {
895 | "cell_type": "code",
896 | "execution_count": null,
897 | "metadata": {},
898 | "outputs": [],
899 | "source": [
900 | "\n",
901 | "\n",
902 | "\n",
903 | "\n",
904 | "\n",
905 | "\n",
906 | "\n",
907 | "\n",
908 | "\n"
909 | ]
910 | },
911 | {
912 | "cell_type": "markdown",
913 | "metadata": {},
914 | "source": [
915 | "### 1.3) Univariate analysis in categorical variables"
916 | ]
917 | },
918 | {
919 | "cell_type": "code",
920 | "execution_count": 16,
921 | "metadata": {},
922 | "outputs": [],
923 | "source": [
924 | "from funpymodeling.exploratory import freq_tbl"
925 | ]
926 | },
927 | {
928 | "cell_type": "markdown",
929 | "metadata": {},
930 | "source": [
931 | "It retrieves several statistics related to categorical variablees, such as frequecnyt, percentage an cumulative percentage.\n",
932 | "\n",
933 | "It will run for all categorical variables excluding all the other ones."
934 | ]
935 | },
936 | {
937 | "cell_type": "markdown",
938 | "metadata": {},
939 | "source": [
940 | "Just like the others, it supports: pandas dataframe, pandas series and 1D/2D numpy arrays"
941 | ]
942 | },
943 | {
944 | "cell_type": "code",
945 | "execution_count": 17,
946 | "metadata": {},
947 | "outputs": [
948 | {
949 | "name": "stdout",
950 | "output_type": "stream",
951 | "text": [
952 | " sex frequency percentage cumulative_perc\n",
953 | "0 Male 157 0.643443 0.643443\n",
954 | "1 Female 87 0.356557 1.000000\n",
955 | "\n",
956 | "----------------------------------------------------------------\n",
957 | "\n",
958 | " smoker frequency percentage cumulative_perc\n",
959 | "0 No 151 0.618852 0.618852\n",
960 | "1 Yes 93 0.381148 1.000000\n",
961 | "\n",
962 | "----------------------------------------------------------------\n",
963 | "\n",
964 | " day frequency percentage cumulative_perc\n",
965 | "0 Sat 87 0.356557 0.356557\n",
966 | "1 Sun 76 0.311475 0.668033\n",
967 | "2 Thur 62 0.254098 0.922131\n",
968 | "3 Fri 19 0.077869 1.000000\n",
969 | "\n",
970 | "----------------------------------------------------------------\n",
971 | "\n",
972 | " time frequency percentage cumulative_perc\n",
973 | "0 Dinner 176 0.721311 0.721311\n",
974 | "1 Lunch 68 0.278689 1.000000\n",
975 | "\n",
976 | "----------------------------------------------------------------\n",
977 | "\n"
978 | ]
979 | }
980 | ],
981 | "source": [
982 | "freq_tbl(tips)"
983 | ]
984 | },
985 | {
986 | "cell_type": "markdown",
987 | "metadata": {},
988 | "source": [
989 | "If 1 variable is provided, it returns the table associated to that variable so we can use in our data pipeline:"
990 | ]
991 | },
992 | {
993 | "cell_type": "code",
994 | "execution_count": 18,
995 | "metadata": {},
996 | "outputs": [
997 | {
998 | "data": {
999 | "text/html": [
1000 | "\n",
1001 | "\n",
1014 | "
\n",
1015 | " \n",
1016 | " \n",
1017 | " \n",
1018 | " day \n",
1019 | " frequency \n",
1020 | " percentage \n",
1021 | " cumulative_perc \n",
1022 | " \n",
1023 | " \n",
1024 | " \n",
1025 | " \n",
1026 | " 0 \n",
1027 | " Sat \n",
1028 | " 87 \n",
1029 | " 0.356557 \n",
1030 | " 0.356557 \n",
1031 | " \n",
1032 | " \n",
1033 | " 1 \n",
1034 | " Sun \n",
1035 | " 76 \n",
1036 | " 0.311475 \n",
1037 | " 0.668033 \n",
1038 | " \n",
1039 | " \n",
1040 | " 2 \n",
1041 | " Thur \n",
1042 | " 62 \n",
1043 | " 0.254098 \n",
1044 | " 0.922131 \n",
1045 | " \n",
1046 | " \n",
1047 | " 3 \n",
1048 | " Fri \n",
1049 | " 19 \n",
1050 | " 0.077869 \n",
1051 | " 1.000000 \n",
1052 | " \n",
1053 | " \n",
1054 | "
\n",
1055 | "
"
1056 | ],
1057 | "text/plain": [
1058 | " day frequency percentage cumulative_perc\n",
1059 | "0 Sat 87 0.356557 0.356557\n",
1060 | "1 Sun 76 0.311475 0.668033\n",
1061 | "2 Thur 62 0.254098 0.922131\n",
1062 | "3 Fri 19 0.077869 1.000000"
1063 | ]
1064 | },
1065 | "execution_count": 18,
1066 | "metadata": {},
1067 | "output_type": "execute_result"
1068 | }
1069 | ],
1070 | "source": [
1071 | "day_freq=freq_tbl(tips['day'])\n",
1072 | "\n",
1073 | "day_freq"
1074 | ]
1075 | },
1076 | {
1077 | "cell_type": "markdown",
1078 | "metadata": {},
1079 | "source": [
1080 | "Days with low representativity (30%):"
1081 | ]
1082 | },
1083 | {
1084 | "cell_type": "code",
1085 | "execution_count": 19,
1086 | "metadata": {},
1087 | "outputs": [
1088 | {
1089 | "data": {
1090 | "text/html": [
1091 | "\n",
1092 | "\n",
1105 | "
\n",
1106 | " \n",
1107 | " \n",
1108 | " \n",
1109 | " day \n",
1110 | " frequency \n",
1111 | " percentage \n",
1112 | " cumulative_perc \n",
1113 | " \n",
1114 | " \n",
1115 | " \n",
1116 | " \n",
1117 | " 0 \n",
1118 | " Sat \n",
1119 | " 87 \n",
1120 | " 0.356557 \n",
1121 | " 0.356557 \n",
1122 | " \n",
1123 | " \n",
1124 | " 1 \n",
1125 | " Sun \n",
1126 | " 76 \n",
1127 | " 0.311475 \n",
1128 | " 0.668033 \n",
1129 | " \n",
1130 | " \n",
1131 | "
\n",
1132 | "
"
1133 | ],
1134 | "text/plain": [
1135 | " day frequency percentage cumulative_perc\n",
1136 | "0 Sat 87 0.356557 0.356557\n",
1137 | "1 Sun 76 0.311475 0.668033"
1138 | ]
1139 | },
1140 | "execution_count": 19,
1141 | "metadata": {},
1142 | "output_type": "execute_result"
1143 | }
1144 | ],
1145 | "source": [
1146 | "day_freq[day_freq['percentage']>0.3]"
1147 | ]
1148 | },
1149 | {
1150 | "cell_type": "code",
1151 | "execution_count": 20,
1152 | "metadata": {},
1153 | "outputs": [
1154 | {
1155 | "data": {
1156 | "text/plain": [
1157 | "0 Sat\n",
1158 | "1 Sun\n",
1159 | "Name: day, dtype: category\n",
1160 | "Categories (4, object): ['Thur', 'Fri', 'Sat', 'Sun']"
1161 | ]
1162 | },
1163 | "execution_count": 20,
1164 | "metadata": {},
1165 | "output_type": "execute_result"
1166 | }
1167 | ],
1168 | "source": [
1169 | "day_freq[day_freq['percentage']>0.3]['day']"
1170 | ]
1171 | },
1172 | {
1173 | "cell_type": "code",
1174 | "execution_count": null,
1175 | "metadata": {},
1176 | "outputs": [],
1177 | "source": [
1178 | "\n",
1179 | "\n",
1180 | "\n",
1181 | "\n",
1182 | "\n",
1183 | "\n",
1184 | "\n",
1185 | "\n",
1186 | "\n"
1187 | ]
1188 | },
1189 | {
1190 | "cell_type": "markdown",
1191 | "metadata": {},
1192 | "source": [
1193 | "### 1.4) Pairwaise correlation analysis"
1194 | ]
1195 | },
1196 | {
1197 | "cell_type": "code",
1198 | "execution_count": 21,
1199 | "metadata": {},
1200 | "outputs": [],
1201 | "source": [
1202 | "from funpymodeling.exploratory import corr_pair"
1203 | ]
1204 | },
1205 | {
1206 | "cell_type": "markdown",
1207 | "metadata": {},
1208 | "source": [
1209 | "A wrapper around `corr` of pandas that allow us to quickly filter most important variables, or not. \n",
1210 | "\n",
1211 | "Useful in EDA and when doing the features pre-selection before creating the predictive model."
1212 | ]
1213 | },
1214 | {
1215 | "cell_type": "code",
1216 | "execution_count": 22,
1217 | "metadata": {},
1218 | "outputs": [
1219 | {
1220 | "data": {
1221 | "text/html": [
1222 | "\n",
1223 | "\n",
1236 | "
\n",
1237 | " \n",
1238 | " \n",
1239 | " \n",
1240 | " variable \n",
1241 | " q_nan \n",
1242 | " p_nan \n",
1243 | " q_zeros \n",
1244 | " p_zeros \n",
1245 | " unique \n",
1246 | " type \n",
1247 | " \n",
1248 | " \n",
1249 | " \n",
1250 | " \n",
1251 | " 0 \n",
1252 | " carat \n",
1253 | " 0 \n",
1254 | " 0.0 \n",
1255 | " 0 \n",
1256 | " 0.000000 \n",
1257 | " 273 \n",
1258 | " float64 \n",
1259 | " \n",
1260 | " \n",
1261 | " 1 \n",
1262 | " cut \n",
1263 | " 0 \n",
1264 | " 0.0 \n",
1265 | " 0 \n",
1266 | " 0.000000 \n",
1267 | " 5 \n",
1268 | " object \n",
1269 | " \n",
1270 | " \n",
1271 | " 2 \n",
1272 | " color \n",
1273 | " 0 \n",
1274 | " 0.0 \n",
1275 | " 0 \n",
1276 | " 0.000000 \n",
1277 | " 7 \n",
1278 | " object \n",
1279 | " \n",
1280 | " \n",
1281 | " 3 \n",
1282 | " clarity \n",
1283 | " 0 \n",
1284 | " 0.0 \n",
1285 | " 0 \n",
1286 | " 0.000000 \n",
1287 | " 8 \n",
1288 | " object \n",
1289 | " \n",
1290 | " \n",
1291 | " 4 \n",
1292 | " depth \n",
1293 | " 0 \n",
1294 | " 0.0 \n",
1295 | " 0 \n",
1296 | " 0.000000 \n",
1297 | " 184 \n",
1298 | " float64 \n",
1299 | " \n",
1300 | " \n",
1301 | " 5 \n",
1302 | " table \n",
1303 | " 0 \n",
1304 | " 0.0 \n",
1305 | " 0 \n",
1306 | " 0.000000 \n",
1307 | " 127 \n",
1308 | " float64 \n",
1309 | " \n",
1310 | " \n",
1311 | " 6 \n",
1312 | " price \n",
1313 | " 0 \n",
1314 | " 0.0 \n",
1315 | " 0 \n",
1316 | " 0.000000 \n",
1317 | " 11602 \n",
1318 | " int64 \n",
1319 | " \n",
1320 | " \n",
1321 | " 7 \n",
1322 | " x \n",
1323 | " 0 \n",
1324 | " 0.0 \n",
1325 | " 8 \n",
1326 | " 0.000148 \n",
1327 | " 554 \n",
1328 | " float64 \n",
1329 | " \n",
1330 | " \n",
1331 | " 8 \n",
1332 | " y \n",
1333 | " 0 \n",
1334 | " 0.0 \n",
1335 | " 7 \n",
1336 | " 0.000130 \n",
1337 | " 552 \n",
1338 | " float64 \n",
1339 | " \n",
1340 | " \n",
1341 | " 9 \n",
1342 | " z \n",
1343 | " 0 \n",
1344 | " 0.0 \n",
1345 | " 20 \n",
1346 | " 0.000371 \n",
1347 | " 375 \n",
1348 | " float64 \n",
1349 | " \n",
1350 | " \n",
1351 | "
\n",
1352 | "
"
1353 | ],
1354 | "text/plain": [
1355 | " variable q_nan p_nan q_zeros p_zeros unique type\n",
1356 | "0 carat 0 0.0 0 0.000000 273 float64\n",
1357 | "1 cut 0 0.0 0 0.000000 5 object\n",
1358 | "2 color 0 0.0 0 0.000000 7 object\n",
1359 | "3 clarity 0 0.0 0 0.000000 8 object\n",
1360 | "4 depth 0 0.0 0 0.000000 184 float64\n",
1361 | "5 table 0 0.0 0 0.000000 127 float64\n",
1362 | "6 price 0 0.0 0 0.000000 11602 int64\n",
1363 | "7 x 0 0.0 8 0.000148 554 float64\n",
1364 | "8 y 0 0.0 7 0.000130 552 float64\n",
1365 | "9 z 0 0.0 20 0.000371 375 float64"
1366 | ]
1367 | },
1368 | "execution_count": 22,
1369 | "metadata": {},
1370 | "output_type": "execute_result"
1371 | }
1372 | ],
1373 | "source": [
1374 | "diamonds = sns.load_dataset('diamonds')\n",
1375 | "\n",
1376 | "status(diamonds)"
1377 | ]
1378 | },
1379 | {
1380 | "cell_type": "code",
1381 | "execution_count": 23,
1382 | "metadata": {},
1383 | "outputs": [
1384 | {
1385 | "data": {
1386 | "text/html": [
1387 | "\n",
1388 | "\n",
1401 | "
\n",
1402 | " \n",
1403 | " \n",
1404 | " \n",
1405 | " v1 \n",
1406 | " v2 \n",
1407 | " R \n",
1408 | " R2 \n",
1409 | " \n",
1410 | " \n",
1411 | " \n",
1412 | " \n",
1413 | " 1 \n",
1414 | " depth \n",
1415 | " carat \n",
1416 | " 0.028224 \n",
1417 | " 0.000797 \n",
1418 | " \n",
1419 | " \n",
1420 | " 2 \n",
1421 | " table \n",
1422 | " carat \n",
1423 | " 0.181618 \n",
1424 | " 0.032985 \n",
1425 | " \n",
1426 | " \n",
1427 | " 3 \n",
1428 | " price \n",
1429 | " carat \n",
1430 | " 0.921591 \n",
1431 | " 0.849331 \n",
1432 | " \n",
1433 | " \n",
1434 | " 4 \n",
1435 | " x \n",
1436 | " carat \n",
1437 | " 0.975094 \n",
1438 | " 0.950809 \n",
1439 | " \n",
1440 | " \n",
1441 | " 5 \n",
1442 | " y \n",
1443 | " carat \n",
1444 | " 0.951722 \n",
1445 | " 0.905775 \n",
1446 | " \n",
1447 | " \n",
1448 | " 6 \n",
1449 | " z \n",
1450 | " carat \n",
1451 | " 0.953387 \n",
1452 | " 0.908947 \n",
1453 | " \n",
1454 | " \n",
1455 | " 7 \n",
1456 | " carat \n",
1457 | " depth \n",
1458 | " 0.028224 \n",
1459 | " 0.000797 \n",
1460 | " \n",
1461 | " \n",
1462 | " 9 \n",
1463 | " table \n",
1464 | " depth \n",
1465 | " -0.295779 \n",
1466 | " 0.087485 \n",
1467 | " \n",
1468 | " \n",
1469 | " 10 \n",
1470 | " price \n",
1471 | " depth \n",
1472 | " -0.010647 \n",
1473 | " 0.000113 \n",
1474 | " \n",
1475 | " \n",
1476 | " 11 \n",
1477 | " x \n",
1478 | " depth \n",
1479 | " -0.025289 \n",
1480 | " 0.000640 \n",
1481 | " \n",
1482 | " \n",
1483 | "
\n",
1484 | "
"
1485 | ],
1486 | "text/plain": [
1487 | " v1 v2 R R2\n",
1488 | "1 depth carat 0.028224 0.000797\n",
1489 | "2 table carat 0.181618 0.032985\n",
1490 | "3 price carat 0.921591 0.849331\n",
1491 | "4 x carat 0.975094 0.950809\n",
1492 | "5 y carat 0.951722 0.905775\n",
1493 | "6 z carat 0.953387 0.908947\n",
1494 | "7 carat depth 0.028224 0.000797\n",
1495 | "9 table depth -0.295779 0.087485\n",
1496 | "10 price depth -0.010647 0.000113\n",
1497 | "11 x depth -0.025289 0.000640"
1498 | ]
1499 | },
1500 | "execution_count": 23,
1501 | "metadata": {},
1502 | "output_type": "execute_result"
1503 | }
1504 | ],
1505 | "source": [
1506 | "res=corr_pair(diamonds)\n",
1507 | "\n",
1508 | "res.head(10)"
1509 | ]
1510 | },
1511 | {
1512 | "cell_type": "markdown",
1513 | "metadata": {},
1514 | "source": [
1515 | "If `price` is the target..."
1516 | ]
1517 | },
1518 | {
1519 | "cell_type": "markdown",
1520 | "metadata": {},
1521 | "source": [
1522 | "Feature analysis for predictive modeling:"
1523 | ]
1524 | },
1525 | {
1526 | "cell_type": "code",
1527 | "execution_count": 24,
1528 | "metadata": {},
1529 | "outputs": [],
1530 | "source": [
1531 | "res_target_ordered=res[res['v2']=='price'].sort_values('R2', ascending=False)"
1532 | ]
1533 | },
1534 | {
1535 | "cell_type": "code",
1536 | "execution_count": null,
1537 | "metadata": {},
1538 | "outputs": [],
1539 | "source": [
1540 | "\n",
1541 | "\n",
1542 | "\n",
1543 | "\n"
1544 | ]
1545 | },
1546 | {
1547 | "cell_type": "markdown",
1548 | "metadata": {},
1549 | "source": [
1550 | "Get top 3 most correlated features:"
1551 | ]
1552 | },
1553 | {
1554 | "cell_type": "code",
1555 | "execution_count": 25,
1556 | "metadata": {},
1557 | "outputs": [
1558 | {
1559 | "data": {
1560 | "text/plain": [
1561 | "21 carat\n",
1562 | "25 x\n",
1563 | "26 y\n",
1564 | "Name: v1, dtype: object"
1565 | ]
1566 | },
1567 | "execution_count": 25,
1568 | "metadata": {},
1569 | "output_type": "execute_result"
1570 | }
1571 | ],
1572 | "source": [
1573 | "# Top 3:\n",
1574 | "top_vars=res_target_ordered[0:3]['v1']\n",
1575 | "top_vars"
1576 | ]
1577 | },
1578 | {
1579 | "cell_type": "code",
1580 | "execution_count": null,
1581 | "metadata": {},
1582 | "outputs": [],
1583 | "source": [
1584 | "\n",
1585 | "\n",
1586 | "\n",
1587 | "\n"
1588 | ]
1589 | },
1590 | {
1591 | "cell_type": "markdown",
1592 | "metadata": {},
1593 | "source": [
1594 | "On the opposite: Delete less relevant features (threshold R2 < 0.05)"
1595 | ]
1596 | },
1597 | {
1598 | "cell_type": "code",
1599 | "execution_count": 26,
1600 | "metadata": {},
1601 | "outputs": [
1602 | {
1603 | "data": {
1604 | "text/plain": [
1605 | "23 table\n",
1606 | "22 depth\n",
1607 | "Name: v1, dtype: object"
1608 | ]
1609 | },
1610 | "execution_count": 26,
1611 | "metadata": {},
1612 | "output_type": "execute_result"
1613 | }
1614 | ],
1615 | "source": [
1616 | "res_target_ordered[res_target_ordered['R2']<0.05]['v1']"
1617 | ]
1618 | },
1619 | {
1620 | "cell_type": "code",
1621 | "execution_count": null,
1622 | "metadata": {},
1623 | "outputs": [],
1624 | "source": [
1625 | "\n",
1626 | "\n",
1627 | "\n",
1628 | "\n",
1629 | "\n",
1630 | "\n",
1631 | "\n",
1632 | "\n",
1633 | "\n"
1634 | ]
1635 | },
1636 | {
1637 | "cell_type": "markdown",
1638 | "metadata": {},
1639 | "source": [
1640 | "### 1.5) Get numeric and categorical var names"
1641 | ]
1642 | },
1643 | {
1644 | "cell_type": "markdown",
1645 | "metadata": {},
1646 | "source": [
1647 | "Definitely, this is not fancy but useful internally and used with sklearn pipelines."
1648 | ]
1649 | },
1650 | {
1651 | "cell_type": "code",
1652 | "execution_count": 27,
1653 | "metadata": {},
1654 | "outputs": [],
1655 | "source": [
1656 | "from funpymodeling.exploratory import cat_vars, num_vars"
1657 | ]
1658 | },
1659 | {
1660 | "cell_type": "code",
1661 | "execution_count": 28,
1662 | "metadata": {},
1663 | "outputs": [
1664 | {
1665 | "data": {
1666 | "text/html": [
1667 | "\n",
1668 | "\n",
1681 | "
\n",
1682 | " \n",
1683 | " \n",
1684 | " \n",
1685 | " variable \n",
1686 | " q_nan \n",
1687 | " p_nan \n",
1688 | " q_zeros \n",
1689 | " p_zeros \n",
1690 | " unique \n",
1691 | " type \n",
1692 | " \n",
1693 | " \n",
1694 | " \n",
1695 | " \n",
1696 | " 0 \n",
1697 | " total_bill \n",
1698 | " 0 \n",
1699 | " 0.0 \n",
1700 | " 0 \n",
1701 | " 0.0 \n",
1702 | " 229 \n",
1703 | " float64 \n",
1704 | " \n",
1705 | " \n",
1706 | " 1 \n",
1707 | " tip \n",
1708 | " 0 \n",
1709 | " 0.0 \n",
1710 | " 0 \n",
1711 | " 0.0 \n",
1712 | " 123 \n",
1713 | " float64 \n",
1714 | " \n",
1715 | " \n",
1716 | " 2 \n",
1717 | " sex \n",
1718 | " 0 \n",
1719 | " 0.0 \n",
1720 | " 0 \n",
1721 | " 0.0 \n",
1722 | " 2 \n",
1723 | " category \n",
1724 | " \n",
1725 | " \n",
1726 | " 3 \n",
1727 | " smoker \n",
1728 | " 0 \n",
1729 | " 0.0 \n",
1730 | " 0 \n",
1731 | " 0.0 \n",
1732 | " 2 \n",
1733 | " category \n",
1734 | " \n",
1735 | " \n",
1736 | " 4 \n",
1737 | " day \n",
1738 | " 0 \n",
1739 | " 0.0 \n",
1740 | " 0 \n",
1741 | " 0.0 \n",
1742 | " 4 \n",
1743 | " category \n",
1744 | " \n",
1745 | " \n",
1746 | " 5 \n",
1747 | " time \n",
1748 | " 0 \n",
1749 | " 0.0 \n",
1750 | " 0 \n",
1751 | " 0.0 \n",
1752 | " 2 \n",
1753 | " category \n",
1754 | " \n",
1755 | " \n",
1756 | " 6 \n",
1757 | " size \n",
1758 | " 0 \n",
1759 | " 0.0 \n",
1760 | " 0 \n",
1761 | " 0.0 \n",
1762 | " 6 \n",
1763 | " int64 \n",
1764 | " \n",
1765 | " \n",
1766 | "
\n",
1767 | "
"
1768 | ],
1769 | "text/plain": [
1770 | " variable q_nan p_nan q_zeros p_zeros unique type\n",
1771 | "0 total_bill 0 0.0 0 0.0 229 float64\n",
1772 | "1 tip 0 0.0 0 0.0 123 float64\n",
1773 | "2 sex 0 0.0 0 0.0 2 category\n",
1774 | "3 smoker 0 0.0 0 0.0 2 category\n",
1775 | "4 day 0 0.0 0 0.0 4 category\n",
1776 | "5 time 0 0.0 0 0.0 2 category\n",
1777 | "6 size 0 0.0 0 0.0 6 int64"
1778 | ]
1779 | },
1780 | "execution_count": 28,
1781 | "metadata": {},
1782 | "output_type": "execute_result"
1783 | }
1784 | ],
1785 | "source": [
1786 | "status(tips)"
1787 | ]
1788 | },
1789 | {
1790 | "cell_type": "markdown",
1791 | "metadata": {},
1792 | "source": [
1793 | "Retrieve categorical var names:"
1794 | ]
1795 | },
1796 | {
1797 | "cell_type": "code",
1798 | "execution_count": 29,
1799 | "metadata": {},
1800 | "outputs": [
1801 | {
1802 | "data": {
1803 | "text/plain": [
1804 | "Index(['sex', 'smoker', 'day', 'time'], dtype='object')"
1805 | ]
1806 | },
1807 | "execution_count": 29,
1808 | "metadata": {},
1809 | "output_type": "execute_result"
1810 | }
1811 | ],
1812 | "source": [
1813 | "cat_vars(tips)"
1814 | ]
1815 | },
1816 | {
1817 | "cell_type": "markdown",
1818 | "metadata": {},
1819 | "source": [
1820 | "Retrieve numerical var names:"
1821 | ]
1822 | },
1823 | {
1824 | "cell_type": "code",
1825 | "execution_count": 30,
1826 | "metadata": {},
1827 | "outputs": [
1828 | {
1829 | "data": {
1830 | "text/plain": [
1831 | "Index(['total_bill', 'tip', 'size'], dtype='object')"
1832 | ]
1833 | },
1834 | "execution_count": 30,
1835 | "metadata": {},
1836 | "output_type": "execute_result"
1837 | }
1838 | ],
1839 | "source": [
1840 | "num_vars(tips)"
1841 | ]
1842 | },
1843 | {
1844 | "cell_type": "code",
1845 | "execution_count": null,
1846 | "metadata": {},
1847 | "outputs": [],
1848 | "source": [
1849 | "\n",
1850 | "\n",
1851 | "\n",
1852 | "\n",
1853 | "\n",
1854 | "\n",
1855 | "\n",
1856 | "\n",
1857 | "\n",
1858 | "\n"
1859 | ]
1860 | },
1861 | {
1862 | "cell_type": "markdown",
1863 | "metadata": {},
1864 | "source": [
1865 | "## 2) Data Preparation"
1866 | ]
1867 | },
1868 | {
1869 | "cell_type": "markdown",
1870 | "metadata": {},
1871 | "source": [
1872 | "### 2.1) Convert \"almost-everything\" into a pandas dataframe"
1873 | ]
1874 | },
1875 | {
1876 | "cell_type": "code",
1877 | "execution_count": 31,
1878 | "metadata": {},
1879 | "outputs": [],
1880 | "source": [
1881 | "from funpymodeling.data_prep import todf\n",
1882 | "\n",
1883 | "import numpy as np"
1884 | ]
1885 | },
1886 | {
1887 | "cell_type": "markdown",
1888 | "metadata": {},
1889 | "source": [
1890 | "Note: Yes, under certain scenarios is not convenient due to performance reasons. But many scenarios we need/want to test or do a quick explore.\n",
1891 | "\n",
1892 | "`todf` is used as the entry point in many functions of `funPyModeling`."
1893 | ]
1894 | },
1895 | {
1896 | "cell_type": "code",
1897 | "execution_count": 32,
1898 | "metadata": {},
1899 | "outputs": [
1900 | {
1901 | "data": {
1902 | "text/html": [
1903 | "\n",
1904 | "\n",
1917 | "
\n",
1918 | " \n",
1919 | " \n",
1920 | " \n",
1921 | " 0 \n",
1922 | " \n",
1923 | " \n",
1924 | " \n",
1925 | " \n",
1926 | " 0 \n",
1927 | " 11 \n",
1928 | " \n",
1929 | " \n",
1930 | " 1 \n",
1931 | " 12 \n",
1932 | " \n",
1933 | " \n",
1934 | " 2 \n",
1935 | " 5 \n",
1936 | " \n",
1937 | " \n",
1938 | " 3 \n",
1939 | " 2 \n",
1940 | " \n",
1941 | " \n",
1942 | "
\n",
1943 | "
"
1944 | ],
1945 | "text/plain": [
1946 | " 0\n",
1947 | "0 11\n",
1948 | "1 12\n",
1949 | "2 5\n",
1950 | "3 2"
1951 | ]
1952 | },
1953 | "execution_count": 32,
1954 | "metadata": {},
1955 | "output_type": "execute_result"
1956 | }
1957 | ],
1958 | "source": [
1959 | "# 1D List\n",
1960 | "list_1d = [11, 12, 5, 2] \n",
1961 | "todf(list_1d)"
1962 | ]
1963 | },
1964 | {
1965 | "cell_type": "code",
1966 | "execution_count": 33,
1967 | "metadata": {},
1968 | "outputs": [
1969 | {
1970 | "data": {
1971 | "text/html": [
1972 | "\n",
1973 | "\n",
1986 | "
\n",
1987 | " \n",
1988 | " \n",
1989 | " \n",
1990 | " 0 \n",
1991 | " 1 \n",
1992 | " 2 \n",
1993 | " 3 \n",
1994 | " \n",
1995 | " \n",
1996 | " \n",
1997 | " \n",
1998 | " 0 \n",
1999 | " 11 \n",
2000 | " 12 \n",
2001 | " 5 \n",
2002 | " 2 \n",
2003 | " \n",
2004 | " \n",
2005 | " 1 \n",
2006 | " 15 \n",
2007 | " 24 \n",
2008 | " 6 \n",
2009 | " 10 \n",
2010 | " \n",
2011 | " \n",
2012 | " 2 \n",
2013 | " 10 \n",
2014 | " 8 \n",
2015 | " 12 \n",
2016 | " 5 \n",
2017 | " \n",
2018 | " \n",
2019 | " 3 \n",
2020 | " 12 \n",
2021 | " 15 \n",
2022 | " 8 \n",
2023 | " 6 \n",
2024 | " \n",
2025 | " \n",
2026 | "
\n",
2027 | "
"
2028 | ],
2029 | "text/plain": [
2030 | " 0 1 2 3\n",
2031 | "0 11 12 5 2\n",
2032 | "1 15 24 6 10\n",
2033 | "2 10 8 12 5\n",
2034 | "3 12 15 8 6"
2035 | ]
2036 | },
2037 | "execution_count": 33,
2038 | "metadata": {},
2039 | "output_type": "execute_result"
2040 | }
2041 | ],
2042 | "source": [
2043 | "# 2D List\n",
2044 | "list_2d = [[11, 12, 5, 2], [15,24, 6,10], [10, 8, 12, 5], [12,15,8,6]]\n",
2045 | "todf(list_2d)"
2046 | ]
2047 | },
2048 | {
2049 | "cell_type": "code",
2050 | "execution_count": 34,
2051 | "metadata": {},
2052 | "outputs": [
2053 | {
2054 | "data": {
2055 | "text/html": [
2056 | "\n",
2057 | "\n",
2070 | "
\n",
2071 | " \n",
2072 | " \n",
2073 | " \n",
2074 | " 0 \n",
2075 | " \n",
2076 | " \n",
2077 | " \n",
2078 | " \n",
2079 | " 0 \n",
2080 | " 11 \n",
2081 | " \n",
2082 | " \n",
2083 | " 1 \n",
2084 | " 12 \n",
2085 | " \n",
2086 | " \n",
2087 | " 2 \n",
2088 | " 5 \n",
2089 | " \n",
2090 | " \n",
2091 | " 3 \n",
2092 | " 2 \n",
2093 | " \n",
2094 | " \n",
2095 | "
\n",
2096 | "
"
2097 | ],
2098 | "text/plain": [
2099 | " 0\n",
2100 | "0 11\n",
2101 | "1 12\n",
2102 | "2 5\n",
2103 | "3 2"
2104 | ]
2105 | },
2106 | "execution_count": 34,
2107 | "metadata": {},
2108 | "output_type": "execute_result"
2109 | }
2110 | ],
2111 | "source": [
2112 | "# 1D numpy array\n",
2113 | "array_1d = np.array(list_1d)\n",
2114 | "todf(array_1d)"
2115 | ]
2116 | },
2117 | {
2118 | "cell_type": "code",
2119 | "execution_count": 35,
2120 | "metadata": {},
2121 | "outputs": [
2122 | {
2123 | "data": {
2124 | "text/html": [
2125 | "\n",
2126 | "\n",
2139 | "
\n",
2140 | " \n",
2141 | " \n",
2142 | " \n",
2143 | " 0 \n",
2144 | " 1 \n",
2145 | " 2 \n",
2146 | " 3 \n",
2147 | " \n",
2148 | " \n",
2149 | " \n",
2150 | " \n",
2151 | " 0 \n",
2152 | " 11 \n",
2153 | " 12 \n",
2154 | " 5 \n",
2155 | " 2 \n",
2156 | " \n",
2157 | " \n",
2158 | " 1 \n",
2159 | " 15 \n",
2160 | " 24 \n",
2161 | " 6 \n",
2162 | " 10 \n",
2163 | " \n",
2164 | " \n",
2165 | " 2 \n",
2166 | " 10 \n",
2167 | " 8 \n",
2168 | " 12 \n",
2169 | " 5 \n",
2170 | " \n",
2171 | " \n",
2172 | " 3 \n",
2173 | " 12 \n",
2174 | " 15 \n",
2175 | " 8 \n",
2176 | " 6 \n",
2177 | " \n",
2178 | " \n",
2179 | "
\n",
2180 | "
"
2181 | ],
2182 | "text/plain": [
2183 | " 0 1 2 3\n",
2184 | "0 11 12 5 2\n",
2185 | "1 15 24 6 10\n",
2186 | "2 10 8 12 5\n",
2187 | "3 12 15 8 6"
2188 | ]
2189 | },
2190 | "execution_count": 35,
2191 | "metadata": {},
2192 | "output_type": "execute_result"
2193 | }
2194 | ],
2195 | "source": [
2196 | "# 2D numpy array\n",
2197 | "array_2d = np.array(list_2d)\n",
2198 | "todf(array_2d)"
2199 | ]
2200 | },
2201 | {
2202 | "cell_type": "code",
2203 | "execution_count": 36,
2204 | "metadata": {},
2205 | "outputs": [
2206 | {
2207 | "data": {
2208 | "text/html": [
2209 | "\n",
2210 | "\n",
2223 | "
\n",
2224 | " \n",
2225 | " \n",
2226 | " \n",
2227 | " v1 \n",
2228 | " v2 \n",
2229 | " \n",
2230 | " \n",
2231 | " \n",
2232 | " \n",
2233 | " 0 \n",
2234 | " 11 \n",
2235 | " 15 \n",
2236 | " \n",
2237 | " \n",
2238 | " 1 \n",
2239 | " 12 \n",
2240 | " 24 \n",
2241 | " \n",
2242 | " \n",
2243 | " 2 \n",
2244 | " 5 \n",
2245 | " 6 \n",
2246 | " \n",
2247 | " \n",
2248 | " 3 \n",
2249 | " 2 \n",
2250 | " 10 \n",
2251 | " \n",
2252 | " \n",
2253 | "
\n",
2254 | "
"
2255 | ],
2256 | "text/plain": [
2257 | " v1 v2\n",
2258 | "0 11 15\n",
2259 | "1 12 24\n",
2260 | "2 5 6\n",
2261 | "3 2 10"
2262 | ]
2263 | },
2264 | "execution_count": 36,
2265 | "metadata": {},
2266 | "output_type": "execute_result"
2267 | }
2268 | ],
2269 | "source": [
2270 | "# Data frame (in=out)\n",
2271 | "pd_df=pd.DataFrame({'v1':[11, 12, 5, 2], 'v2':[15,24, 6,10]}) #\n",
2272 | "todf(pd_df)"
2273 | ]
2274 | },
2275 | {
2276 | "cell_type": "code",
2277 | "execution_count": 37,
2278 | "metadata": {},
2279 | "outputs": [
2280 | {
2281 | "data": {
2282 | "text/html": [
2283 | "\n",
2284 | "\n",
2297 | "
\n",
2298 | " \n",
2299 | " \n",
2300 | " \n",
2301 | " v1 \n",
2302 | " \n",
2303 | " \n",
2304 | " \n",
2305 | " \n",
2306 | " 0 \n",
2307 | " 11 \n",
2308 | " \n",
2309 | " \n",
2310 | " 1 \n",
2311 | " 12 \n",
2312 | " \n",
2313 | " \n",
2314 | " 2 \n",
2315 | " 5 \n",
2316 | " \n",
2317 | " \n",
2318 | " 3 \n",
2319 | " 2 \n",
2320 | " \n",
2321 | " \n",
2322 | "
\n",
2323 | "
"
2324 | ],
2325 | "text/plain": [
2326 | " v1\n",
2327 | "0 11\n",
2328 | "1 12\n",
2329 | "2 5\n",
2330 | "3 2"
2331 | ]
2332 | },
2333 | "execution_count": 37,
2334 | "metadata": {},
2335 | "output_type": "execute_result"
2336 | }
2337 | ],
2338 | "source": [
2339 | "# Pandas series\n",
2340 | "todf(pd_df['v1'])"
2341 | ]
2342 | },
2343 | {
2344 | "cell_type": "markdown",
2345 | "metadata": {},
2346 | "source": [
2347 | "Raise an error in +2D objects:"
2348 | ]
2349 | },
2350 | {
2351 | "cell_type": "code",
2352 | "execution_count": 41,
2353 | "metadata": {},
2354 | "outputs": [
2355 | {
2356 | "name": "stdout",
2357 | "output_type": "stream",
2358 | "text": [
2359 | "(1, 4, 4)\n"
2360 | ]
2361 | }
2362 | ],
2363 | "source": [
2364 | "list_3d = np.array([[[11, 12, 5, 2], [15,24, 6,10], [10, 8, 12, 5], [12,15,8,6]]]) # error\n",
2365 | "print(list_3d.shape)\n",
2366 | "# todf(list_3d) # <- error"
2367 | ]
2368 | },
2369 | {
2370 | "cell_type": "markdown",
2371 | "metadata": {},
2372 | "source": [
2373 | "It raises the error: `Exception: I live in flattland! (can't handle objects with more than 2 dimensions)`"
2374 | ]
2375 | },
2376 | {
2377 | "cell_type": "code",
2378 | "execution_count": null,
2379 | "metadata": {},
2380 | "outputs": [],
2381 | "source": [
2382 | "\n",
2383 | "\n",
2384 | "\n",
2385 | "\n",
2386 | "\n",
2387 | "\n",
2388 | "\n",
2389 | "\n",
2390 | "\n",
2391 | "\n",
2392 | "\n"
2393 | ]
2394 | },
2395 | {
2396 | "cell_type": "markdown",
2397 | "metadata": {},
2398 | "source": [
2399 | "## 3) Model validation: Clustering"
2400 | ]
2401 | },
2402 | {
2403 | "cell_type": "code",
2404 | "execution_count": 43,
2405 | "metadata": {},
2406 | "outputs": [],
2407 | "source": [
2408 | "from funpymodeling.model_validation import coord_plot"
2409 | ]
2410 | },
2411 | {
2412 | "cell_type": "code",
2413 | "execution_count": 44,
2414 | "metadata": {},
2415 | "outputs": [],
2416 | "source": [
2417 | "from sklearn.cluster import KMeans\n",
2418 | "\n",
2419 | "x = iris.drop('species', axis=1)\n",
2420 | "\n",
2421 | "mod_km=KMeans(n_clusters=3)\n",
2422 | "iris['cluster']=mod_km.fit_predict(x)\n"
2423 | ]
2424 | },
2425 | {
2426 | "cell_type": "code",
2427 | "execution_count": 45,
2428 | "metadata": {},
2429 | "outputs": [
2430 | {
2431 | "data": {
2432 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAEuCAYAAABriGJyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAABWiUlEQVR4nO3dd1gU19vG8e+hVwV7AXvFhgjYa+z600RNj0nsijW29F5M7CV2TVHfRKPRFHuvUUGwi1hRsYuIdBb2vH8ARo3KArs7u3A+1+Wluzs7504Gnp09M/OMkFKiKIqi5E82WgdQFEVRTEcVeUVRlHxMFXlFUZR8TBV5RVGUfEwVeUVRlHxMFXlFUZR8zE6rgYUQskGDBloNr+RRaGgoavtZJ7XtrFtoaOgdKWVxQ5cXWp0nL4SQ6hx96yWEQG0/66S2nXUTQoRKKf0NXV5N1yiKouRjqsgriqLkY6rIK4qi5GOaHXhVFEXRik6nIyoqiuTkZK2jPJWTkxNeXl7Y29vnaT3ZFnkhxA9AV+CWlLL2E14XwAygM5AIvC2lDMtTKkVRFBOKiorC3d2dChUqkFHCLIuUkujoaKKioqhYsWKe1mXIdM1PQMdnvN4JqJr5ZyAwN0+JFEVRTCw5OZmiRYtaZIGHjDOgihYtapRvGtnuyUspdwshKjxjke7AkszzIQ8IITyEEKWllNfznE6xSMlpOq0jKLmUderk7du3NU6irfT0dNLS0rSOka309PQ8bytjzMmXBa489Dgq87lsi/y1a9coU6aMESIo5nIh9g5d136vdQwlF3Q6HYsXLwbgo48+0jiNtnr37s21a9c0zbBz506++OIL0tPTefnllwkKCvrPMvfu3WPWrFl5GsesB16FEAPJmNIBYMKECbRq1YqiRYuaM4aSB9NuhnE/NeMr5J/bNlHY1lHjRIohdDodu3fv5tatWwA0atRI40TacnR0xNXVVbPx09PT+eyzz1i5ciVlypShffv2dOvWjerVqz+ynKOj43+21YIFC3I0ljGK/FXA+6HHXpnP/YeUcgGwADKueC1UqBC7du1iyJAh1KxZ0whRFFPad/08oRc38kpVf6YA/zgl8F3TDlrHUrIRFxfHzJkzuXPnDn379mXBggX06dNH61iaCg8Pp1ixYpqNv3//fqpVq4a/f8aFq6+//jq7d++madOmjyx3+/bt/2yrvn375mgsYxT5v4BhQojlQEMg1tD5+PHjxzNjxgxmzZpFv379VD8NC5au1/NF8FrKunrwZaPuTAF+PRvC2zWbULNIKa3jKU9x9+5dpk+fzt27dwkKCqJOnTpaR7I4nx78m5N3jTt1U6tIGT5v+L+nvn716lW8vf/dN/by8uLgwYNGzZAl27NrhBC/AvuB6kKIKCFEPyHEYCHE4MxF1gMXgHPAQuC/E0tPUbhwYcaMGUOFChVYuHAhe/bsycV/gmIOq86HcfLudT7w74SzXcZ5u+72TnwZsk71QbFQN27cYOLEidy/f5+RI0eqAl9AGXJ2zavZvC6BobkN4OrqyqhRo5g/fz7Lli0jISGBDh06WOypTQVRgi6F70I34Ve8HN0q1n3w/Du+z/FZ8Fq2R0XwnHcNDRMqj4uMjGTmzJnY2toyZsyYR/YalUc9a4/bVMqWLcuVK/+erxIVFUXZsmVNMpZFtDVwcHAgKCiIwMBA1qxZw++//672Di3InOO7uJUUx6eBXR/58H2rZmMqFSrGlyHr0OnTNUyoPOz06dNMnToVJycnxo0bpwq8BQoICODs2bNcvHiR1NRUli9fTrdu3UwylkUUeQBbW1v69OlDq1at2LJlC0uWLCE9XRUOrV2Lv8f8E3voXrEeDUqUe+Q1extbPgrozLnY2yw7bZr5RCVnwsLCmDVrFkWLFmX8+PGUKFFC60jKE9jZ2fH999/ToUMHatasyUsvvUStWrVMM5ZJ1ppLNjY2vPLKK7i5ubF27VoSEhIYMGBAnns3KLn3bdgmJJL3/Z980XM775o0LV2ZqUe28kJlXzwcXcycUMmyd+9eli1bRsWKFRk2bJimpwgq2evcuTOdO3c2+TgWsyefRQjB//73P15++WWOHj3KzJkzSUpK0jpWgXT49hVWnz/MgFrN8HLzfOIyQgg+CejCvZQkZh7dbuaESpZNmzaxdOlSfHx8GDVqlCrwygMWV+SztGnThr59+3Lu3DmmTp1KXFyc1pEKFCklXwSvpZiTG8Pqtn7msrWKluHlqg34MXw/F+/fMVNCBTK20++//87q1asJCAggKCgIR0d1gZryL4st8gANGzYkKCiI69evM2nSJO7evat1pAJjXeRxQm5dYpxfe9zssy8a4/zaY29jy9chG8yQToGMqyaXLl3K5s2badmyJX379sXOzqJmYBULYNFFHqBOnTqMGjWK+/fvM3HiRK5fV33PTC05Tcc3hzZSw7MUr1Q17FaSJV0KMaxuKzZePsn+GxdMnFDR6XQsXLiQffv20aVLF1599VVsbCz+11nRgFX8VFSpUoWxY8eSnp7OpEmTuHjxotaR8rUfwv/hcvxdPg3sgm0OCseAWs0p41qYL4LXoZd6EyYs2JKTk5k1axaHDx/mpZdeolu3buq6EuWprKLIQ8Zlv+PHj8fZ2Zlp06YRHh6udaR86U5SPLOObqetdw2al6mao/c629nzfoNOHI++yu/nDpsoYcEWHx/P1KlTOXv2LH369OG5557TOpJi4aymyAMUL16c8ePHU6xYMb7//nvCwtQNqIxtyuEtJKXp+DigS67e371SXXyLefNt2CYSdalGTlew3b17l0mTJnHt2jWGDBlS4DtJWru+fftSokQJatf+zw33jMqqijz82++mfPnyLFiwgL1792odKd+IiLnJ/50JpneNRlQuXDxX67ARNnwW2JWbifeZe2KXkRMWXFl9aO7du8eIESOoW7du9m9SLNrbb7/Nxo0bTT6O1RV5yOh3M3LkSHx8fFi6dKlZ/kcVBF+ErMPd3pHRvnmbAvAvWZ5uFesy9/huriXEGildwRUZGcmkSZNIS0tj7NixVKtWTetIihG0aNGCIkWKmHwcqz3fytHRkaCgIH766SfWrFlDQkICPXr0UAegcmlHVAS7rp7h08AueDrl/UKa9xt0ZNPlU0wM3cT0Fi8ZIWHBFBERwezZs3Fzc2PkyJGULFlS60j5zorQz4m6d8qo6/Ty8OHlBp8adZ25ZZV78lns7Ozo27cvrVq1YvPmzSxdulT1u8mFNH06X4aso4J7Ud6q0dgo6/R2L0J/n2asOh/G0TtRRllnQXPkyBFmzpz5oA+NKvBKbljtnnyWJ/W76d+/v+p3kwP/FxHMmXu3WNSmNw62xvuRGFa3FSvOHuKL4LWs6jRIfcvKgX379rF06VIqVKjA8OHDVZsCE7KUPW5Tseo9+SwP97s5cuQIs2bNUv1uDBSbksSUw1tpXKoSHcr5GHXd7g5OjPVrx8Gbkay/dMKo687PNm/ezJIlS6hZsybvvPOOKvBKnuSLIp8lq9/N2bNnVb8bA808toOYlEQ+Dexikj3tV6r6U92jJN8c2kBKeprR15+fSClZvXo1v//+Ow0aNGDo0KGqD00+9uqrr9K4cWMiIiLw8vJi8eLFJhknXxV5UP1uciLyfjQ/ntrHS1X9qF3UNHelsbOx5dPArlyKu8uPp/4xyRj5gV6vZ9myZWzatIkWLVrQv39/1Ycmn/v111+5fv06Op2OqKgo+vXrZ5Jx8l2Rh//2u7lx44bWkSzSN4c2YGdjyzi/DiYdp0XZqrTxqs6Mo9uITo436VjWKKsPzd69e+ncuTOvvfaa6kOjGE2+/UmqUqUKY8aMIT09nYkTJxIZGal1JIty8MZF1l86QVCdlpRyKWTy8T4O6EJimo6ph7eafCxrkpyc/ODq7RdffJHu3burA9SKUeXbIg/g7e3NuHHjcHJyYurUqarfTSa91PN58FpKuxRmUO3mZhmzqkcJ3qjekGURwZy5d9MsY1q6+Ph4pk2bxpkzZ3j77bdp27at1pGUfChfF3mAEiVKMH78eIoWLar63WRaff4Ix6Kv8p5/R5ztHMw27pj6bXG1d+DL4PVmG9NSxcTEMHnyZKKiohg8eDCNGxvn+gRFeVy+L/IAHh4ejB07lnLlyhX4fjeJulS+Dd1IvWJevFCpnlnHLuLkysh6bdhxNYKdV8+YdWxLktWHJiYmhpEjR1Kvnnm3g1KwFIgiDxn9bkaNGvWg382mTZu0jqSJ+Sd3cyPxPp8GdsVGmH/zv12zCeXdi/Jl8DrS9AXv6uTLly8zefJkdDodY8aMUX1oFJMrMEUe/u13ExAQ8OB8ZCml1rHM5npCLHOO76JLhToElqygSQZHWzs+9O9ExL2b/HomRJMMWomIiGDKlCk4ODgwbtw4ypUrp3UkRSNXrlyhdevW+Pj4UKtWLWbMmGGysQrcibhZ/W5cXFzYvHkzCQkJvP7669ja2modzeQmhW0mXa/nA/+OmuboVL4WDUtWZPLhLXSv5EshBydN85jDkSNHWLhwIcWLF2fkyJF4enpqHUnRkJ2dHVOmTMHPz4+4uDgaNGhAu3bt8PEx7lXnUMD25LPY2Njw6quv0qVLF/bt28fChQvR6XRaxzKp43eusvJcGP18mlHevaimWYQQfBrYhbvJicw6ukPTLObwzz//MG/ePLy9vRk7dqwq8AqlS5fGz88PAHd3d2rWrMnVq1dNMlaB25PPIoSgW7duuLm5sWLFCmbNmkVQUBBOTvlvr1JKyechayni5MLweq21jgNA3WJe9KpSn8Wn9tK7RkPKuZu+r7YWtmzZwqpVq6hZsyaDBw/Olz9f1m7FihVERRm3U6qXlxcvv/yyQctGRkZy+PBhGjZsaNQMWQrknvzD2rRpQ58+fR70u4mPz39XZG68fJIDNy4ytn47i5oaGe/XAVsbG745tEHrKEYnpWTNmjWsWrXqQR8aVeCVx8XHx9OzZ0+mT59OoUKmuSixwO7JP6xRo0a4uLiwYMECJk6cyKhRo8xyxxZzSElP46uQDVT3KMmr1QK0jvOI0q6FGVK7JVOPbCXkZiQBGh0MNja9Xs8vv/zCnj17aNGiBa+++qpqU2DBDN3jNjadTkfPnj15/fXX6dGjh8nGUT95merWrcvIkSOJjY3NV/1ufg7fz6W4aD4O7IKdjeUdXB5cuwWlXArxWfBa9FKvdZw8y+pDs2fPHjp16qT60ChPJKWkX79+1KxZk9GjR5t0LIN++oQQHYUQEUKIc0KI957wejkhxA4hxGEhxDEhRGfjRzW9qlWrMnbsWNLT05k0aZLV97u5m5zA9KPbaFW2Gq3KWub52C72DrzXoANH70Txx4WjWsfJk+TkZGbPnk1YWBi9evXi+eefV31olCfKuinM9u3b8fX1xdfXl/XrTXMluMjuPHEhhC1wBmgHRAEhwKtSylMPLbMAOCylnCuE8AHWSykrZLNeaannqN+6dYvp06cTHx/P0KFDqV69utaRcuWjA3+y9PRBtjw/kmoexr11nBDCaNcY6KWern/P5nZSPLt7jjFrqwVjiY+PZ9asWVy+fJk333zTotsUGHPbWavw8HBq1qypdYxsPSmnECJUSulv6DoM2ZMPBM5JKS9IKVOB5UD3x5aRQNZRg8LANUMDWKKH+93MnDmTw4cPax0px87eu8XS0wd5vXqg0Qu8sdkIGz4N7Mr1xFgWnNijdZwcU31oFEtmyIHXssCVhx5HAY+f6/MZsFkIMRxwBZ7YTk8IMRAYmPV4586dOYhqfo0aNWLXrl3Mnz+fgIAAKleurHUkg02+EYoDNjRKdDHZ/2djrzfApSQzj2yn3B0dnnbWcSbK/fv32bFjB6mpqbRs2ZKYmBiL/7kGy//dM7XChQtbxZ3jkpOT87ytjHV2zavAT1LKKUKIxsBSIURtKR89kialXAAsgIzpmlatWhlpeNNp1aoV8+bNIzg4GG9vb9q3b691pGztvnqWIxc38qF/J7rVaWmycYy9/Srcr0ObNVPZ6xTPlGbaXpVriMuXLzNz5kxsbW159913rapNgTX87plSeHg47u7uWsfIlpOTE/Xr18/TOgyZrrkKeD/02CvzuYf1A34DkFLuB5yAYnlKZiEcHR0ZOnQo/v7+/P7776xevdqi5zPT9Rm94su5FaGvT1Ot4+RIhUJF6ePTlN/OhnIi2jRX/xnLmTNnmDJlCvb29qoPjZWy5N9jMF4+Q4p8CFBVCFFRCOEAvAL89dgyl4HnAIQQNcko8reNktAC2NnZ0a9fP1q0aMGmTZtYtmwZer1lnu7369kQIu7d5IOATjjaWt9lECPqtsbT0YXPg9dZ7C/h0aNHmTFjBp6enowfP56SJS37mIfyX05OTkRHR1vsz5iUkujoaKNcQJdtFZBSpgkhhgGbAFvgBynlSSHEF8AhKeVfwBhgoRDiHTIOwr5tsafO5JKNjQ2vvfYabm5urF+/noSEBPr164e9vb3W0R6IS01mctgWAktWoEv52lrHyZXCjs6Mqd+WDw/8yebLp+hQvpbWkR6xf/9+lixZQrly5Rg+fDhubm5aR1JywcvLi6ioKG7fttx9UScnJ7y8vPK8nmxPoTQVSz6FMjtbt25l5cqV1KhRgyFDhljM5eoTDm1k9vGdrO06FN/i3tkunxemPA0vTZ9Ouz9moNOns/2Fd3CwkG8k+aUPjTqF0rqZ4hRK5TFt27alT58+nDlzhmnTpllEv5srcXdZdGovPSvXN3mBNzU7G1s+DuxCZFw0S04f0DoOUkr++OMPVq1ahZ+fn+pDo1gVVeRzqVGjRgwePJirV68yadIk7t69q2meCaEbEQjebWD5Z6UYonXZarQsU5VpR7YSk5ygWY6sPjQbNmygefPmDBgwwKKm6BQlO6rI50G9evUYMWIE9+7d07TfTeitS/x18RiD67SgjGthTTIYmxCCjwO7EKdLYdqRbZpkSEtLY9GiRezevZuOHTvy+uuvqz40itVRP7F5VK1aNcaMGUNaWhqTJk3i0qVLZh1fL/V8FryWks7uBNU23TnxWqjhWYrXqgWy5PQBzsea9wBZVh+a0NBQevXqxQsvvKD60ChWSRV5IyhXrhzjxo3D0dGRqVOnEhERYbax/7xwjMO3r/Bugw642Ftfz5fsjK3fDic7e74KMU3zpieJj49n+vTphIeH8+abb9KuXTuzja0oxqaKvJGULFmScePGUaRIEWbOnMmRI0dMPmZSmo4JoRuoU7Qsvar4mXw8LRRzdmN43dZsuRLO3mvnTD5eVh+aK1euMHjwYJo2ta4LyhTlcarIG5Gnpydjx47F29ubefPmsW/fPpOOt/DkHq4lxPJJYBdsRP7dlP18muLt5snnwWtJN+FFaDdv3mTSpEnExMQwYsQIfH19TTaWophL/q0MGnF1dWXUqFHUqFGDJUuWsGXLFpOMcysxju+P7aRjuVo0LlXJJGNYCic7ez7070R4zA1WnDtkkjEuX77MpEmTSElJYfTo0VbbXlpRHqeKvAk4OTkxdOhQGjRowKpVq1izZo3RLz6ZGLYJnT6dDwM6GXW9lqpLhToElCjPpLDNxOtSjLrux/vQlC9f3qjrVxQtqSJvIvb29vTv358WLVqwceNGo/a7OXX3GivOhtKnZmMqFsoXfeCyJYTgk8Cu3E6KZ/axnUZb79GjR5k5cyYeHh6MGzeOUqVKGW3dimIJLON68Xwqq9+Nq6srGzZsIDExkb59++bpYhopJZ8Hr8PD0ZkR9doYMa3lq1/cmxcq+bLg5B5erx6Il5tnntan+tAoBYHakzcxIQTPP/88L774ImFhYcyePZvk5ORcr2/rlXD2XT/PaN+2eDi6GDGpdXg/84reCaEb87SerVu38tNPP1GtWjXeeecdVeCVfEsVeTNp27Ytb7/9NhEREbnud5OansaXIeupUrg4b9R4/OZcBUMZNw8G127BnxeOEnrrco7fL6Xkzz//ZOXKlfj5+TFs2DDVh0bJ11SRN6PGjRszePBgoqKimDx5MjExMTl6/9KIg1y4f4ePA7pgb2NropSWL6hOS0o4u/NF8NocHdDO6kOzfv16mjVrpvrQKAWCKvJmVq9ePUaOHElMTAwTJ07k5s2bBr0vJiWRqYe30rxMFdp4FezT+1ztHRnv157Q25f5++Ixg96TlpbG4sWLH/SheeONN1QfGqVAUD/lGsjqd6PT6Zg0aRKXL2c/7TDjyDbidMl8EtBV9VABXqzSgFpFSvNN6AaS03TPXDYlJYXZs2dz6NAhevbsqfrQKAWKummIhm7evMn06dNJTEwkKCjoqRfgXIi9TZs103i5qj/fNe1h5pRPZgk3nth3/Twvb1zIew06MKxu6ycuk5CQwKxZs4iMjKR3794Fvk3BoctrCSj/PyZs6o6zfSGcHdxxsS+Es707zg4Zf7s4FMp47cG/M15ztHPN11dWW4uc3jREnUKpoZIlSzJ+/HhmzJjBzJkzGTBgwBMvpf/60Aac7OwZ66caZT2saenKdCjnw6yjO3i5qj/Fnd0feT0mJoaZM2dy69YtBg0alOe73lu787cP8eP+dwBwsncjURdLdEIUSbr7JOnuo0t/9kVmAvHIh8HDHxJOD38g2Bd65MPB2T7zg8TBHXtbdZDb3NSevAWIj4/n+++/59KlS7z55ps0btz4wWv/7q12ZFjdVtqFfIwl7MnDv99yXqrqz8SHvuXcvHmTGTNmEB8fz9ChQwt8m4K7CVf5ZlM3nO3d+Krb7iduO116Ckm6OJJS75OkiyMx9X7mB8BDz+nuk5Qa9+D5R5bRxSHlsy/4s7NxzCz+T/sGkfXh8ZTn7NywKcAnHUDO9+RVkbcQycnJzJs3j/DwcHr16kW7du1I1+vp/PcsYlOT2PnCGJzsLOdMEEsp8gCfHfybH8L/YWO3EfgUKc2VK1eYMWMGUkqGDx9OhQoVtI6oqZS0RCZt6cnthCu8124NZTyqmWTbSSlJSUvILPyZHwSPfTgk6u6T/NCHQ2JqxuOsD4zU9KRsx3Gyc8PZwf2JU0rO9oUefGt4/NtG1jcMe1snqz4mo4q8FdPpdPz444+EhobSsWNHkn3KMu6f1cxp+SrdKtXTOt4jLKnIx6Qk0mzVJOoWK8vnFVswe/ZsnJ2dGTVqVIFvU6CXehbuG8rhKxsY2vIH6pRpY1Hb7nHpel3mN4esD4b7GX9nPvefbxCp/36LyPpw0cu0Z45hI+ye8OGQNaX036moR45TZL5ua6PdTLcq8lZOr9fz66+/snv3bm6VcUPvV4k/ugZZ3J6HpRWKxaf2MXvj7zQ6n0zxYsUZOXIkRYoU0TqW5v4+Pp21J6bR0/cD2tccBFjetjMmKSWp6UmZHxKxj0w1PfxBkJQa+8Tpp6TUOJLTsr9Q0cHW+QkfEoVxdnDPOD7xxIPa/x6rcLRzzfXvtDrwauWy+t0cj7sJhyOo5KkjLS1NXbSTjRqxAv8zCcS7O/L1mNF4Fsof97rNi7Ar61l7YhqNKvSkXY2BWscxCyEEjnYuONq54EHJXK1Dr08nKS3+3w+C1PuPfVjcf2xKKo74lBhux19+sEyaPjWbnDaZ3xge/gZR6BnHKwo/eJxTqshboGsJsax0uUv7+pW5cDic2bNnM3jwYHX5/VNs27aN3377jRIVvFhaPJ4/ok7Sx6eJ1rE0dSXmJD/uH03FovV5I/Abi/smaMlsbGxxdSiMq0PudxR06cn/+XB4+FvDkw5gRydcefCeZF0cEuN821LTNRZo+K7lrL90gt09xnLp2CmWLl1K+fLlGTZsmMU00rKEr/xSSv7++2/WrVtH/fr16du3L29uX8KJu9fY23NsgWzgBnA/6TbfbO4GSN5v/xeFnUs88rolbDvl2fRST4ou/pFvElnHKBpX6pmj6Rp1ZYOFOXz7CmsuHGFgreaUdfOgSZMmDBo0iCtXruSq301+lXXsYt26dTRt2pSBAwfi4ODAJ4GdiU1JYubR7VpH1IQuPYV5ewcRn3KXoOYL/1PgFetgI2xwdihEEdeyeHnWpGqJQOqVbUujijm/GFIVeQuS0St+LcWd3Rj60Dnxvr6+jBgxgpiYGCZNmmRwv5v8Ki0tjR9++IFdu3bRvn17evfu/aAPjU+RMrxSzZ8fw/dzIfaOxknNS0rJLyEfcv5OKG83mkK5InW0jqRYAFXkLcjayOMcunWJcX7tcbN3fOS16tWrM2bMGFJTUw3ud5MfpaSkMGfOHEJCQujRowc9e/b8z3zzuPrtcbCx5ZtDGzRKqY1tEYv55+JKutQagX+5rlrHUSyEKvIWIjlNxzeHNlDTsxQvV3nydFu5cuUYN24c9vb2TJkyhTNnzpg5pbYSEhKYPn06p06donfv3nTo0OGJy5VwcWdY3dZsvHySf66fN3NKbZy8votVR77G16sDXeu8o3UcxYKoIm8hFp/ax5X4GD4N7IrtM1rgZvW78fT0ZMaMGRw9etSMKbVz7949Jk+ezOXLlxk0aBDNmjV75vL9azWjrKsHXwSvQ5/NpfbW7sb98yzcN4yyhavTp9E01URMeYRBPw1CiI5CiAghxDkhxHtPWeYlIcQpIcRJIcQvxo2Zv91JimfWsR20865JszJVsl3e09OTsWPH4uXlxbx589i/f78ZUmrn1q1bTJo0iejoaIYPH25QozFnO3ve9+/IibvXWHUuzAwptZGQGsvs3f2ws7EnqMUinOxdtY6kWJhsi7wQwhaYDXQCfIBXhRA+jy1TFXgfaCqlrAWMMn7U/Gvy4S0kp+n4KKCzwe9xc3PjnXfeoXr16vz0009s3brVhAm1c+XKFSZNmkRSUhKjR4+mRo0aBr+3e8V61C/uzXehm0jUPfviFGuUrk9j4b6hRCdEMbj5fIq6emkdSbFAhuzJBwLnpJQXpJSpwHKg+2PLDABmSyljAKSUt4wbM/86HXODX84E82aNRlQuXDxH73VycmLo0KH4+fmxcuVK/vjjj3x1/vPZs2eZMmUKtra2jB8/PseNxoQQfBbYlZtJccw5scs0ITW06vDXhN/Yw2v+X1GleIDWcRQLZUiRLwtceehxVOZzD6sGVBNC7BNCHBBCdDRWwPxMSsmXwetwt3finfptc7UOe3t7BgwYQLNmzdiwYQO//PILer31z0EfP36cGTNmUKhQIcaPH5/rRmMNSpSnW8W6zDu+m2sJsUZOqZ2955ez/cwPtKnWl2aVX9E6jmLBjNXWwA6oCrQCvIDdQog6Usp7Dy8khBgIPGiisXPnTiMNb52OJt5m182zvFGkBkf3B+dpXV5eXtSsWZPdu3cTGRlJo0aNsLU1bd9tU22/yMhIDhw4gKenJ02aNOHYMcPu4/o0rXUebNCnM3r9zwwuXtdIKbVzO/UM26O/pZRDLYrfb5ar7VDQf/cKkmzbGgghGgOfSSk7ZD5+H0BKOeGhZeYBB6WUP2Y+3ga8J6UMecZ6C3RbA50+nfZ/zCBN6tn2/CgcbI3zebtlyxZWrVqFj48PgwYNMlm/G1NdGr99+3ZWrFhB9erVCQoKMlr+CYc2Mvv4TtZ2HYpvcW+jrFML0QlRTNjUDReHwrzb/o9c9VdRbQ2sW067UBoyXRMCVBVCVBRCOACvAH89tswfZOzFI4QoRsb0zQVDQxRE/xcRzNnYW3zk38loBR6gXbt2vPnmm4SHhzN9+nQSEhKMtm5TklLy119/sWLFCnx9fRk+fLhRP6CG1W1FMSc3vghZZ7UFLlmXwJzd/UnT6whqsShPDbSUgiPbIi+lTAOGAZuAcOA3KeVJIcQXQohumYttAqKFEKeAHcA4KWW0qUJbu9iUJKYc3kKTUpVoX84n+zfkUNOmTRk8eLDV9LvR6/UsX778kT40xm6t7O7gxFi/dgTfjGTdpRNGXbc56KWeHw+8w9XYCAY0/Z5ShSprHUmxEqoLpQa+DF7HgpN72dhtOLWKljHZOBEREcyZMwdXV1dGjhxJyZK566/9JMb6yp+WlsZPP/1ESEgI7du3p0ePHiZri5uu19Phr5kk6lLZ0WM0jkb8BmVqfx2bwrqTM3mx/se0rdE/T+tS0zXWzRTTNYoRRd6P5ofwf3ipagOTFnjI6HczevRoUlJSLLLfTWpqKnPnzn1mHxpjsrWx4ZOALlyOv8sPp/aZbBxjO3R5LetOzqRJxRd5rno/reMoVkYVeTP7+tB67G1sGe/X3izjlS9f3iL73WT1oTl58uQz+9AYW4uyVXnOqwYzj24nOjn727xp7fLd4/x0YAyVizXgtYCv1c0/lBxTRd6MDty4wIZLJxlapyUlXXJ+G6/cKlWqFOPGjcPDw4OZM2dq3u8mNjaWKVOmcOnSJQYOHJhtHxpj+yigM4lpOqYctuyrhGOTbjFnzwDcHIswuNl87G0ds3+TojxGFXkz0Us9XwSvo7RLYQbWbm728YsUKcK4ceMoW7aspv1ubt++zcSJE7lz5w7Dhg3Dz8/P7BmqepSgd42GLIs4SESMZfbm16UnM3fPQBJS7jG0xSIKOefsamhFyaKKvJmsPn+YY9FXed+/I852DppkyOp3U61aNU363URFRTFx4kSSkpJ45513qFmzplnHf9gY37a42zvyZcg6zTI8jZSSZcEfcDH6MH0aT8Xbs5bWkRQrpoq8GSTqUpkQuol6xbx4vlI9TbM4OTk92INeuXIlf/75p1nOtDh37hyTJ0/G1taWcePGUbFiRZOP+SyeTq6MrPccO6+eYUdUhKZZHrfl9AIORP5O19rv4OdteNM6RXkSVeTNYN6J3dxMvM9ngV0totf3w/1u1q9fz6+//mrSfjfHjx9n+vTpD/rQlC5d2mRj5cTbNRtTwb0oX4asI02frnUcAI5f287qIxPw8+5Ml9ojtI6j5APaV5x87npCLHNP7KJrhToElKygdZwHbGxseOONN+jQoQO7du1i8eLFpKWlGX2c4OBg5syZQ+nSpRk3bhxFihQx+hi55WBrx4cBnThz7xa/nnlqBw6zuR57lkX/jMDLoyZvN5piETsEivWznqtBrNTEsE2k6/V84N9J6yj/IYSgR48euLq6snr1ahITExk8eDCOjsY5i2PHjh0sX76catWqERQUhLOzs1HWa0wdy9WiUamKTD68he6VfCnkYJpeP9lJSLnH7N39sLdxJKjFIhztXDTJoeQ/alfBhI7diWLluTD612pGOXfL2YN9XIcOHR70u5k2bVqe+91IKfn7779Zvnw5vr6+jBgxwiILPGR80H0a0JW7yYnMOrpDkwzpeh0L9gURk3idIc3nU8T18U7eipJ7qsibiJSSz4PXUdTJleF1W2sdJ1tNmzZl0KBBD/rd3Lt3L1fryepDs3btWpo0aWKSPjTGVqdYWV6s4sfiU3u5FGf+lku/hX3J6Zv7eD3gGyoXN/hqdUUxiCryJrLh0kkO3rzIuPrtcddoCiCn6tevz4gRI4iOjmbixIncupWzG3ylp6fz448/snPnzgfdME3d095YxjfogK2NDRMObTTruLvP/cLOsz/Ttnp/mlR60axjKwWDKvImkJKexteH1lPdoySvVLOuPbPH+91cuXIl+zeR0Ydmzpw5BAcH88ILL5i8D42xlXIpRFCdlqyNPE7wzUizjHnm1gF+PfQxtUq3pKfvB2YZUyl4VJE3gZ/C/+FS3F0+CeyCnY117Mk+rEKFCowbNw5bW1smT57M2bNnn7l8YmLigz40b7zxBh07drSqAp9lUK0WlHIpxOfBa9FL095C8U78ZebtGUxxt/L0bzILGyv8OVGsgyryRhadHM+Mo9tpXbY6LctW0zpOrpUqVYrx48fj4eHBjBkznnoLvof70AwYMIDmzc3fssFYXOwdeK9BR47eiWLNBdP190nWxTN7d3+kTGdoy8W4qJt/KCakiryRTT28jQRdKh8HWv+Viln9bsqUKcPcuXM5cODAI69n9aG5ffs2w4YNo0GDBholNZ4elX2pV8yLbw9tJCkt1ejr10s9P+x/hxv3zzGg6RxKumt75a+S/6kib0Rn7t1kWcRB3qgeSDUP492gQ0tubm6MHj2aqlWr8uOPP7J9+/YHr1lKHxpjshEZPeevJ8Yy/8Qeo6//r2OTOXp1My/W/xif0tb7rUexHpoW+fUnZ+WrO9R8FbIeV3sHRtdvq3UUo3JycmL48OH4+vqyYsUKli5dCmRcNTt27FjN+9AYW8NSFelcvjazj+/kRuJ9o603OPJPNpyaTbPKr9C62ttGW6+iPIumRf7PY5P5+eA40tKN/7XY3HZdPcP2qAhG1G1DUSc3reMYnb29PQMHDqRp06bs3bsXgPHjx1OmjGnvbqWVD/w7ka7XMylsk1HWFxl9lCXB46hSPJBXG3xplQemFeukaZHvWnsU+y+uZOaut0hMjdUySp6k6dP5Ingd5d2L0MenidZxTMbW1pbevXszZMgQAIoWLapxItOpUKgofXya8tvZMI7fuZqndd1LvMncPQMo5FScwc3mYWerTatppWDStMj/r8479Gk0jXO3Q5i4pQd34i3rHqSGWn7mEBH3bvKBfyerujl0bggh8PX11TqGWYys1wZPRxe+CFmX62nF1LRk5u4dSJIujqDmi3B3yr8fjIpl0vzAa6OKPRjVehmxybf5dvPzXLxzWOtIORKXmsykw5tpWLICncvX1jqOYkSFHJwY69eO/TcusOnyqRy/X0rJ0uB3iYw+Qp9G0/DyzB8HpxXronmRB6hWohHvtluDo70rU7a/TNiV9VpHMtisYzuITk7gk8Cuap41H3qtWgDVPErwVch6UtNz1op5U/g8gi/9Qbc6Y6jv3dFECRXl2SyiyAOUKlSZ99r9QTnP2szfO4RN4fMs/syby3F3WXRyL70q+1GvmJfWcRQTsLOx5eOALkTGRfPzacPvi3v06lb+OPod/uX+R+daw02YUFGezWKKPIC7U1HeafML/uX+x+ojE/i/kA9I1+u0jvVUEw5txEbYML5BB62jKCbU2ivj6uXpR7YRk5x9G+ZrsWdY/M8IvIvU5q2Gk9Q3PEVTFlXkAextnejXZCadfIay5/wvfL+rL0mpxjtX2VgO3bzE35HHGFKnBWVc1WXp+d0nAV2I06Uw9ci2Zy4XnxLD7N39cLRzIaj5QhzsLLOPvlJwWFyRh4yrDp+vN543Aydy+uY/TNzai7sJeTuNzZj0Us9nwWsp6VKIIbVbah1HMYPqniV5vVogS04f4Ny9J7dgTtfrmL93MPcSbzKk+QI8XSzjXrZKwWaRRT5L08ovM6LVz8QkXmfC5u5cuntc60gA/HnhGEfuXOE9vw642KtznguKMfXb4WJnz1eHnnxiwIrQzzhz6wC9A7+lUjE/M6dTlCez6CIPULNUM8a3+x17W0cmb32RI1GbNc2TlJbKhNAN1Clalp5V6muaRTGvYs5uDK/Xhq1XTrPn2qPtl3edXcquc8toX2MQjSr20CihovyXxRd5gDKFq/Feuz8o41GdeXsGsi3iB83OvFlwYg/XEmL5NLALNsIq/vcpRtS3ZhPKuRXhi+B1pOszes5H3PyH5aGfUqdMG16o967GCRXlUVZTpQo5F2dMm+X4enXgt7DPWR76Ken6nJ23nFc3E+8z+/guOpWvRaNSlcw6tmIZnOzs+cC/I+ExN1hx7hC34y4xf+8QSrpXpF+TmermH4rFMajICyE6CiEihBDnhBDvPWO5nkIIKYQwyT3vHOycGdhsLu1qDGTn2Z+Zu2cAybrsT2kzlolhm9Hp0/nAv5PZxlQsT5cKdQgoUZ4poWuZtbsvEklQi8U427trHU1R/iPbIi+EsAVmA50AH+BVIYTPE5ZzB0YCB40d8mE2woZe9T/kNf+vOXl9F5O39iIm8YYphwTgZPQ1fjsbSt+aTahYqJjJx1MslxCCjwM64Z2+hZv3LzCo6VxKuFfQOpaiPJEhe/KBwDkp5QUpZSqwHOj+hOW+BL4Dko2Y76laVn2DoS1+4Hb8Zb7d3J0rMTnvLWIoKSWfB6/Fw9GZEfXamGwcxXpEXltOSdtrnE73x83tP/s8imIxDGmZWBa48tDjKKDhwwsIIfwAbynlOiHEuKetSAgxEBiY9Xjnzp05CvskrTzeZffdaXy76QWaegRRxqlentf5uNCEW/xz6wJvFa3J4f0m/aJiVYyx/azRxcR9HIhdgJdTc7bEejNq/c8MK+GrdawcKajbriAS2Z2lIoToBXSUUvbPfNwbaCilHJb52AbYDrwtpYwUQuwExkopD2WzXmmsM2TuJd7k+919iLoXzisNPqdV1TeNsl6A1PQ0nvtjOrZCsOX5UdirA2tAxpSFpfcWMoWLdw4zedvLVCpWn1GtlzH1yA5mHN3On12G0KBEea3jGaSgbrv8QggRKqU0+LinIdM1VwHvhx57ZT6XxR2oDewUQkQCjYC/THXw9Uk8XEoy9rmV1CnThl8PfczKsC/R69ONsu4lpw9w8f4dPg7oogp8AReTeIO5ewfi4VyCQc3mYmtjT1CdlpR0dufz4Nz3nFcUUzKkyIcAVYUQFYUQDsArwF9ZL0opY6WUxaSUFaSUFYADQLfs9uSNzcnelSHNFtCmWh+2Rixi3t7BpKQl5mmdMSmJTDuyjRZlqtLGq7qRkirWKDUt+cHZXEEtFuPmWAQAV3tHxjfoQNjty/x18ZjGKRXlv7It8lLKNGAYsAkIB36TUp4UQnwhhOhm6oA5YWNjy8sNPuNlv884dm0rU7a9TGzSk/uMGGL6kW3E6ZL5OKCL6iRYgEkpWXJwHJfvHqdf4xmU9Xj0A79XZT9qFSnNN4c2kJRmuV1TlYLJoPPkpZTrpZTVpJSVpZRfZz73iZTyrycs28rce/GPa1O9D0HNF3Hj/jm+3fw8V+9F5Hgd52Nv83P4fl6tGkDNIqVMkFKxFhtOzSbk8l90rzuOel7t/vO6rY0NnwZ25WrCPRaf2qtBQkV5Oqu54jWn6pZ9jrHP/Ua61DFxa09OXd+do/d/HbIeJzt7xvr995daKTiORG3mz2OTCCzfnY4+QU9drknpynQo58Osozu4lRhnxoSK8mz5tsgDlCtSh/fa/UkxVy9m7XqbPed+Neh9+66dY/OVcIbXbU1xZ3UVY0F19d5pftg/kgpF6tE7cGK2U3Yf+ncmVZ/O5MNbzJRQUbKXr4s8QBHXMoxru4qapZqxLOQ9Vh/5Fr3UP3X5dL2ez0PW4e3mST+fpmZMqliSuORoZu/uh5O9G4ObL8DBzinb91QqXIy3azZm+dkQTt29boaUipK9fF/kAZzs3Rja4gdaVHmDTeFzWbRvGKlpT74w97dzoZy6e50P/DvhZGdv5qSKJUhLT2X+3iHEJt1mSPOFeLoYfkxmZL02FHJw5gt1SqViIQpEkQewtbHjNf+v6FX/I8KurGfq9le4n3znkWXidSlMDNuEf4nydK1QR6OkipaklPwa+glnbx/krYYTqVjUN0fv93B0YbTvc+y9fo5tUadNE1JRcqDAFHnIuNKvXY0BDGo2j6h74Xy3+Xmux/5784c5x3ZyOymeTwLVKZMF1Y6zP7H3/K90rBlEYIXnc7WO3jUaUblwcb4MWY/OSBflKUpuFagin6W+d0fGPvcbqenJTNzSg4ib/3A1/h7zT+7h+Uq++BUvp3VERQPhN/ayMuxL6pZtS/d6T23BlC17G1s+DujM+djbLD19wIgJFSXnsu1dY7KBjdi7JrfuxF/h+119uBl3EVG4B5tuO7K7x1jKunlomssa5Lf+JzfjLvLtpm54uJTi3XZrcLJ3y9P6pJS8umkxJ+5eY2/PsXg4uhgpad7lt21X0Jiid02+VczNm/Htfqe0py/p937jxZJ3KeNaWOtYipklpd5nzu5+CGFLUIvFeS7wkFFIPwnsQmxKEjOObjdCSkXJnQJd5AGc7QsRmtqK21Qn5u46ftg/El26WVriKxZAr09n4T/DuBV3iUHN5lLczXhTdT5FSvNKNX9+Ct/Phdg72b9BUUygwBf5vy8eI/TOVXr5fcUL9d4l+NKfTN/xBvEpd7WOppjB6qMTOHl9F680+JzqJRsbff3j6rfHwcaWrw+tN/q6FcUQBbrIJ6fp+CZ0Az5FSvNyVX86+gQxoOlsIqOP8d3mF7gZd1HriIoJ7b+wii2nF9Kyam9aVn3DJGOUcHFnWN3WbLp8in+unzfJGIryLAW6yC86tY+o+Ht8GtAFW5uM/xX+5boy+rlfSdTd57vNz3P2VrDGKRVTOH/7EMtC3qd6ySa87PepScfqX6sZZV09+CJ4Hen6p19trSimUGCL/O2kOL4/toP23jVpWqbKI69VLtaA99r9gZtjUabveJ3gyD+0CamYxN2Ea8zbOxhPl9IMappx8w9Tcraz533/jpy4e41V58NMOpaiPK7AFvnJYVtITtPxUUDnJ75e3L0877ZbTaVifizeP5J1J2ao087ygZS0RObs6U9qWhJBLRbh6uhhlnG7V6yHX/FyTAzdRIIuxSxjKgoU0CIffvcGv54N4a2ajalUuPhTl3N19GBkq6U0qtCTv45P5eeDY0lLTzVjUsWYpJT8fGAsUTGn6N90FmUKVzPb2EIIPg3sws2kOOYc32W2cRWlwBV5KSVfhqzD3d6JUb7PZbu8na0Dbzeawv/qjGb/xVXM2NmbhNRYMyRVjG3dyZmEXlnHC77vUadMG7OP36BEebpXrMf8E3u4Fn/P7OMrBVOBK/LboyLYfe0s7/g+h6eBVyEKIehaeyR9Gk3jwp0wvtv8ArfjL5s4qWJMYVfW8/fxqTSq0IP2NQZpluN9/45IJN+GbdIsg1KwFKgir9On82XIOioVKsZbNXN+TnSjij0Y2XoZ8SnRfLf5ec7fCTVBSsXYrsSc5Mf9o6lYtD5vBE7QtPmcl5snA2o1Y/X5wxy5fUWzHErBUaCK/LLTBzkXe5uPAjpjb2Obq3VUK9GQd9utwdnenanbXiX08jojp1SM6X7yHebsHoCLQ2GGNJ+PvW32N/8wtWF1W1PMyY3Pg9eqg/mKyRWYIn8vJZGpR7bStHRl2nnXzNO6ShaqxLvt11C+SB0W7Ati46m56pfVAunSU5i3ZxBxKdEEtVhEYeeSWkcCwM3ekXF+7Qm5dYl1l05oHUfJ5wpMkZ95dDv3UpL4JMA4veLdHIvwTpv/I6B8N9Yc/ZZlIe+RrtcZIaliDFJKfj30EefvHOKthpMpX8SybgLzSlV/aniW4puQDSSnqZ8bxXQKRJG/eP8OP4bv5+WqDahVtIzR1mtv60TfxjPoXGs4e88vZ9auPiSl3jfa+pXc2xbxA/su/EbnWsMJKP8/reP8h62NDZ8GduFy/F1+DP9H6zhKPlYgivzXIRuwt7FlnF97o6/bRtjQve5Y3mw4iYib+5m4tSfRCVFGH0cx3Mnru1h15Ct8vTrwvzqjtY7zVM3LVKWtdw1mHt3OnaR4reMo+VS+L/L7b1xg4+WTDKvbipIuhUw2TtNKLzGy1RJiEm/w7ebniYw+arKxlKe7cf88C/cNo2zh6vRpNA0bYdk/4h/5dyYpTcfUI1u1jqLkU5b9G5BHeqnni+B1lHEtzIBazU0+Xo1STXm33WrsbR2ZvO0lDl/ZaPIxlX8lpMYyZ3d/bG3sGNJ8IU72rlpHylYVjxL0rtGIZREHiYi5qXUcJR/K10X+93OHOR59lfcbdMLZzrRNqLKULlyV99r/iZdHTebvHcyW0wvVmTdmkK5PY9G+YdxJuMLgZvMp5uatdSSDjfZ9Dnd7R74MUafjKsaXb4t8oi6Vb8M24VvMm+6V6pp17EJOxRjdZjn1vTux6vBX/Br6Men6NLNmKGh+P/INp27s5jX/L6laIlDrODni6eTKKN/n2Hn1DDuiIrSOo+Qz+bbIzz2xi5uJ9/kssKsm87IOdk4MaDqb9jUHs+vsUubs7keyTh1cM4V951ewLWIxbar1oVnlV7WOkytv1WhMBfeifBmyjjR9utZxlHwkXxb5awmxzD2+m/9VqIt/yfKa5bARNvT0fZ/XAyZw6sYeJm19kZjE65rlyY/O3Q7h/w59SM1SzelV/yOt4+Sag60dHwV05sy9W/xyJkTrOEo+YlCRF0J0FEJECCHOCSHee8Lro4UQp4QQx4QQ24QQ2lVWYGLoJiSSD/w7ahnjgRZVXmN4y5+4E3+Zbzd35/JddZWjMUQnRDFvzyCKunoxoOlsbG3stI6UJx3K+dCoVEUmh23hfqq6mbxiHNkWeSGELTAb6AT4AK8KIXweW+ww4C+lrAusAiYaO6ihjt6JYtX5MPr5NMXbvYhWMf7Dp3QLxrdbjRC2TN72IseubtM6klVL1iUwZ/cA0vQ6hrZYhKtDYa0j5ZkQgk8DuhKTksjMo9u1jqPkE4bsyQcC56SUF6SUqcByoPvDC0gpd0gpEzMfHgC8jBvTMFJKvgheSzEnN4bXba1FhGcq61Gd99v/QclClZmzpz87zvykdSSrpJd6fjowmquxp+nfZBalClXJ/k1Wok6xsrxYxY8fTu3jUly01nGUfMCQIl8WeLgnalTmc0/TD9iQl1C5tf7SCQ7ejGSsXzvcHbTvNvgkhZ1LMva536hbpi3LQz9lRejn6NWBthxZe2I6h6M20tP3A2qXaaV1HKMb36ADtjY2fHNIXWeh5J1RJzGFEG8A/kDLp7w+EBiY9Xjnzp1GG1sn9XwctQcvezdKX0tg53XjrdsUashXSXYVbD/zAxGXDtPEYwh2No5ax8oRY24/Q11OCmbfvdlUdG6G3fUq7Lxh/gzm0NmtPL9HHmfextXUcDL+tKMW207RhsjuQh0hRGPgMyllh8zH7wNIKSc8tlxbYBbQUkp5K9uBhZDGvEho3vHdfHVoPb+070eLslWNtl5T23HmZ1aEfYa3hw/DWv5gMe1wsyOEMPtFXpfvHmfi1l6U86zFO21+xd7Wuj4UcyIpLZUWv0+hhIs7f3cNMuppwFpsO8V4hBChUkp/Q5c35CcnBKgqhKgohHAAXgH+emzQ+sB8oJshBd7YopPjmXF0G228qltVgQdoXe0thjZfxM24C0zY/DxX753WOpJFik26xZw9A3BzLMLgZvPzdYEHcLZz4D3/jhy9E8Xq80e0jqNYsWyLvJQyDRgGbALCgd+klCeFEF8IIbplLjYJcANWCiGOCCH+esrqTGLq4a0kpun4OKCLOYc1mjpln2Nc21VImc7ELT05eX2X1pEsii49hXl7B5GQco+g5gsp5Fxc60hm8UKletQr5sW3oRtJSkvVOo5ipQz6DiilXC+lrCalrCyl/DrzuU+klH9l/rutlLKklNI380+3Z6/ReM7cu8myiGDeqN6Qqh4lzDWs0Xl71uK99n9SzK0c3+/qw+5z/6d1JIsgpeT/Qj7gwp0w3m40hXJFamsdyWxshA2fBnblRuJ95p3YrXUcxUpZ/RWvXwavx9XegTH122odJc88XUozru1KfEq34P9CPuD3w9+gl3qtY2lq6+mF7L+4iq61R9GgnHV+U8uLwJIV6FKhDnOO7+JGorohjZJzVl3kd149w46rEYys14YiTpbfVtYQTvZuBDVfRKuqb7L59HwW7gsiNa1gXv144toOfj86AT/vTnSpPVLrOJr5wL8j6Xo9E0M3aR1FsUJWW+TT9Ol8GbyO8u5FebtmE63jGJWtjR2vNPiCF+t/wuErG5m6/RXuJ93WOpZZXY89y8J/huNVuAZvN5pq8Tf/MKXy7kXp69OUlefCOH7nqtZxFCtjtb85v54JIeLeTT7074SjrXX3LHkSIQRta/RjcPP5RN0L59stz3Mt9ozWscwiIeUec3b3x97GkaAWi3C0c9E6kuZG1GuDp6MLn4esVac/KjlilUX+fmoykw9voWHJinQqX0vrOCbl69WBsc+tRJeewsQtPQm/sVfrSCaVrk9j4b6hRCdeZXDzeRRxfdbF1QVHIQcnxvq148CNi2y6fErrOIoVscoiP+voDu4mJ/JpYBeEEFrHMbkKRevyXvs/8HQpzcydb7Hv/AqtI5nMysNfEn5zL68HfEOV4gFax7Eor1ULoJpHCb4KWU9quroJjWIYqyvyl+PusvjUXnpVqU/dYpr0QdNEUVcvxrddRfWSjVkSPJ4/jk7Md2fe7Dn3KzvO/MRz1fvRtNJLWsexOHY2tnwc0IXIuGh+Pr1f6ziKlbC6Iv/NoQ3Y2tgw3q+D1lHMztmhEMNb/kizyq+y4dRsfvhnJLr0/HHmzZlbB/nl0Ef4lGpJT98PtI5jsVp7VadV2WpMP7KNmOQEreMoVsCqinzIzUjWRh5nSO2WlHa1/v7huWFrY88bARPoUe99Qi7/xbTtrxGfclfrWHlyJ/4K8/cOprhbeQY0nWX1N/8wtY8DuhCvS2XqEXVPAiV7VlPk9VLPZ8FrKeVSiMG1W2gdR1NCCDr4DGZg0zlcjjnBt5uf5+b9C1rHypVkXTxzdvdHr09jaItFuOSDm3+YWnXPkrxePZAlpw9w7p7ZW0UpVsZqivwfF45y9E4U7zXogIu9g9ZxLEKDcl0Y3WY5ybp4vt3yPGduHdQ6Uo7opZ4f9r/DtftnGNB0NiULVdI6ktUYU78tLnb2fHVovdZRFAtnFUU+KS2VCYc2UrdoWXpUrq91HItSqZgf77b/g0JOxZi+43UOXFytdSSD/XV8CkevbubF+h/jU7pgfzvLqaJOboyo14atV06z59pZreMoFswqivz8E3u4nhjLp4FdC/SVj09T3K0c49utoUoxf3488A5/H59u8RfMhFz6iw0nv6dppZdpU62P1nGsUl+fppRzK8LnwetI1+evM60U47H4inkj8T6zj++kc/naNCxVUes4FsvVoTAjWi2hccUXWXtiGj8dGI0uPUXrWE8UGX2Mnw+OpUrxAF7z/6pAXOtgCo62dnwQ0InTMTdYfvaQ1nEUC2XxRX5S2CbS9Xo+8O+kdRSLZ2frwFsNJ9G97lgORK5m5s43SUi5p3WsR8Qm3WTungG4OxZjULN52Nmq4yt50aV8bQJLVmBS2GbiUvPH6bSKcVl0kT8RfZXfzobRx6cpFQoV1TqOVRBC0LnWcPo1nsmFO2F8t+UFbsdd0joWALr0ZObsGUiS7j5BLRZRyKmY1pGsnhCCTwK6cCc5ntnHd2odR7FAFlvkpZR8HrwOT0cXRtRtrXUcqxNYoTvvtPmF+JQYvt3yPOdva/t1XkrJ0uD3iIw+Qp9G0/D29NE0T37iW9ybnpXrs/DkXq7EWfc1E4rxWWyR33z5FPtvXGBM/bYUdnTWOo5VqlI8gHfbr8HFoTBTt79GyKW/NcuyOXw+ByPX0K3OaOp7d9QsR371boOOCATfqp7zymMsssinpqfxZch6qhYuwevVA7WOY9VKulfk3XZrqFC0Hov+Gcb6k9+b/cybY1e3sebotzQo15XOtUaYdeyCooxrYQbXacGfF48SessypucUy2CRRX7J6QNExkXzcWAX7GxstY5j9dwcPRnVehmB5Z/nz2OTWBo8nnS9zixjX4s9w+J/RuDtWYu3G05WZ9KY0JDaLSjp7M5nwarnvPIviyvyMckJTDuylZZlqtK6bDWt4+Qb9raO9G08nS61R7Lvwm/M3PkWiamxJh0zPiWG2bv74WDnzJDmC3GwU9NupuRq78i7DTpw+PYV/rx4VOs4ioWwuCI/7cg24nQpfFxAesWbkxCCbnVG83ajqZy9HczELT25E3/FJGOl63Us2DuEe4k3GNJ8AUVcy5hkHOVRvar4UbtIGSYc2khSmnm+rSmWzaKK/PnY2yw5fYDXqgVSw7OU1nHyrcYVezKy1VJik27y7ebnuRh9xOhjrAj7nIhb+3kj8FsqFfMz+vqVJ7MRNnwS2IWrCfdYdDJ/30VMMYxFFfmvQtbjZGfP2PrttI6S71Uv2Zh326/B0c6ZKdteJuzKBqOte9fZZew6u5R2NQbSuGJPo61XMUyT0pXpUM6H74/t4FZinNZxFI1ZTJHfe+0cW66EM7xua4o5u2kdp0AoVagK77X/E29PHxbsHcLm8Pl5PmAXcXM/y0M/pXaZ1vSo956Rkio59aF/Z1L16Uw6vFnrKIrGLKLIp+v1fB68Fm83T/r5NNU6ToHi7lSUd1r/ip93Z34/8g2/HPqQdH3u7h96O/4y8/cOpqR7Bfo3nomNOjNKM5UKF+Ptmo1ZfuYQp+5e0zqOoiGLKPIrzh0iPOYGH/p3wsnOXus4BY6DnRP9m35PR5+h7D73f8ze3ZckXc6+5ifp4pizux8SSVCLxTg7FDJRWsVQI+u1obCjM18Er1enVBZgmhf5eF0Kk8I2E1CiPF0q1NE6ToFlI2x4od54egd+S/iNvUza0ou7CYbtAer16fywfxQ37p9nYNM5lHCvYNqwikE8HF0Y7fsce6+fY1vUaa3jKBrRvMjPPraT20nxfBLYVZ0yaQGaVX6V4S1/JjrxKt9u6c7lu8ezfc+fxyZz7OpWXvL7hJqlmpkhpWKo3jUaUblwcb4MWY9On651HEUDmhb5qPgYFpzcwwuVfKlf3FvLKMpDfEo35912q7EV9kza+iJHr2596rIHI9ewMXwOzSu/Rquqb5kxpWIIextbPg7ozPnY2yw9fUDrOIoGNC3yE0I3AvB+A9WwytKUKVyN99r/QenCVZm7uz/bI378zzIXo4+w5OC7VC3ekFcafK6+iVmo57xq0LxMFaYe2ca9lESt4yhmZlCRF0J0FEJECCHOCSH+c16cEMJRCLEi8/WDQogKhqz3zwtHGVy7BWXcPHKWWjGLws4lGPvcb9Tzas+KsM9YHvop+oe+8s/dM4DCzsUZ3Fzd/MOSZfSc78r91CSmH9mmdRzFzLIt8kIIW2A20AnwAV4VQjzeDLwfECOlrAJMA74zZPASzu4E1WmZs8SKWTnYOTOo6Vza1RjAjjM/MWfPAOKSowFI1iUwtMVi3ByLaJxSyU7NIqV4pWoAP4Xv1zqKYmZ2BiwTCJyTUl4AEEIsB7oDpx5apjvwWea/VwHfCyGEzOa8rfF+7XG1d8xxaMW8bGxs6VX/I4q7lefX0E/48O+Mg6t9G0+nrEcNjdMphhrn144/LxwBYO7xXdqGUczGkCJfFni4i1UU0PBpy0gp04QQsUBR4M7DCwkhBgIDsx6XuBrPzms7c55a0Yg3LTxGsf/eAgDunXNg57md2kZScqRX4cp8A3x9yHhtLBTLJrK7SEII0QvoKKXsn/m4N9BQSjnsoWVOZC4Tlfn4fOYyd560zsxlstvRVyyUlBIbGxt1gY2VEkKQkJqidQwll1wdHEOllP6GLm/InvxV4OHzG70yn3vSMlFCCDugMBBtaAjFuqizaKyfi706UF5QGHJ2TQhQVQhRUQjhALwC/PXYMn8BWSdJ9wK2q910RVEU7WW7J585xz4M2ATYAj9IKU8KIb4ADkkp/wIWA0uFEOeAu2R8ECiKoigay3ZO3mQDqzl5qyaEUHPyVkptO+smhMjRnLzmvWsURVEU01FFXlEUJR9TRV5RFCUfU0VeURQlHzPkPHlTiRdCRGg4vpI3xYQQT73YTbFoattZt+o5WVjLIh+RkyPEimURQhxS2886qW1n3YQQh3KyvJquURRFycdUkVcURcnHtCzyCzQcW8k7tf2sl9p21i1H20+zK14VRVEU01PTNYqiKPmYKvKKoij5mCryiqIo+Zgq8oqiKPmYWS+GEkJUA8YB5R8eW0rZxpw5lNwTQjQBKvDo9luiWSAlR9T2K3jMfcXrSmAesBBIN/PYSh4JIZYClYEj/Lv9JKCKhBVQ2896CSF6AN8BJQCR+UdKKQtl+15znkKZ2ey+gdkGVIxKCBEO+Ki7vVgntf2sV+Zd9/4npQzP6XvNMicvhCgihCgC/C2ECBJClM56LvN5xTqcAEppHULJNbX9rNfN3BR4MNOevBDiIhlfC8UTXpZSykomD6HkmhDibzK2nzvgCwQDKVmvSym7aZNMMYTaftYrc5oGoCUZH9B/8Oi2W53tOsw8XeMkpUzO7jnFsgghWj7rdSnlLnNlUXJObT/rJYT48RkvSyll32zXYeYiHyal9MvuOcUyCSG+k1K+m91zimVS2896CSGaSin3Zffck5hrTr6UEKIB4CyEqC+E8Mv80wpwMUcGxSjaPeG5TmZPoeSW2n7Wa5aBz/2HuU6h7AC8DXgBUx96Pg74wEwZlFwSQgwBgoBKQohjD73kDmS7J6FoS20/6yWEaAw0AYoLIUY/9FIhwNagdZh5uqanlPJ3sw2oGIUQojDgCUwA3nvopTgp5V1tUimGUtvPemUeT2kFDCbjGqMsccDfUsqz2a7DzEV+9BOejgVCpZRHzBZEyZHsTnNVhcI6PGU7xkkpdWYPo+SIEKK8lPJSrt5r5iL/C+AP/J35VFfgGBmXWa+UUk40WxjFYI+dAlsOiMn8twdwWUpZUbt0iqGEEJGAN49uvxvATWCAlDJUs3DKEz10+usTGXL6q7kblHkBflLKMVLKMUADMi7TbUHGnL1igaSUFTOvZdhKxlV3xaSURcn4kN6sbTolB7YAnR/afp2AtWTM18/RNJnyNJOBKcBFIImMljALgXjgvCErMPee/GmgTtbXQyGEI3BUSllDCHFYSlnfbGGUHBNCHJdS1snuOcUyPWX7HZNS1hVCHJFS+moUTcmGEOKQlNI/u+eexNwNyv4POCiE+DPz8f+AX4QQrsApM2dRcu6aEOIjYFnm49eBaxrmUXLmuhDiXWB55uOXgZtCCFtAr10sxQCuQohKUsoLAEKIioCrIW80+z1ehRABZJwSBLBPSnnIrAGUXMs8cPcpGdNrALuBz9WBV+sghChGxvZrlvnUPuBzMk5+KCelPKdVNuXZhBAdybiB9wUyjqeUBwZJKTdl+14NirwtUJJH+1lfNmsIRVEUK5M5vV0j8+FpKWXKs5Z/8D4zz8kPJ2NP4iYZ/ayzeiLXNVsIJceEENOllKOedqRfNbiyDpk37RnLf28aom7aY6GEEG2klNsfalT2CEMalJl7Tn4kUF1KGW3mcZW8WZr592RNUyh5lXXTnkWom/ZYi5bAdjKOXz5OAhbXhXIH0E5KmWa2QRWjEUI8B/wjpUzSOouSc+qmPQWTuYv8YqA6sI5HeyJPfeqbFIshhPgZaAzcBfaQceB1r5QyRtNgikGEEJ8Bt4A1PPr7pw6cWzghxHngABm/d3uklCcNfq+Zi/ynT3peSvm52UIoeSaEKAP0ImN+t4yU0tzTfkouZF65/Dh10x4rkHnQtSHQHGhKxs7yMSnlC9m916y/nFnFXAjhIqVMNOfYSt4JId4g44esDnAH+J6MPQvFCqj2E1YtHdBl/q0n4xvZLUPeaO49+cbAYsBNSllOCFGPjHM9g8wWQsk1IcQdMi6lngfskFJGaptIyQkhhAswmoxz4gcKIaqScSLEWo2jKdkQQiQCx8lo1b41JyevmLvIHyTja/5fWS0MhBAnpJS1zRZCyRMhRC0yLoZqBlQFIqSUvbVNpRhCCLECCAXelFLWziz6/6h2BpZPCNGdjN+5QCAV+AfYLaXclt17zd2gDCnllceeUqdyWQkhRCEyulCWJ+Nc68Koy+GtSeXMTq86gMwpU6FtJMUQUso/pZTjgEHAejIaOhr0DczcB8yuCCGaAFIIYU/GefPhZs6g5N7eh/58L6WM0jiPkjOpQghnMi9oE0JU5qGzbBTLJYT4HahHxnTpbuBN4KBB7zXzdE0xYAbQlow9iM3ASHVxVP4ghJglpRyudQ7lyYQQ7YCPAB8yfveaAm9LKXdqmUvJnhDCHzgspXzizIcQop2UcssTXzN37xol/xJChEkp/bTOoTydEKIo0IiMnawDUso7GkdSjOBZv3tmma4RQszi2Xc3GWGOHIpSEAkhHv/lv575dzkhRDkpZZi5MylG99RjK+aak1fthBVFO1Oe8ZoEVIMy6/fUnWizFHkp5c+GLKfmdK2eOlPDAkkpWxuy3LPmdRXrZfZTKLPRVOsASp7M0DqAkiffaR1AybXIp72geo4o2TL0jvFSyp/MlUkxCfVNzMI8rY98lqx+8lLKpy6nirxiCNVHvmBQp9pZnif1kc9iUD95Syvyak/CAkkpd2mdQVEKIilln7yuw9KKvJrTtWCZDa0mkHExjVPW86pVbb4RqXUA5emEEF2AWjz6u/dFdu8z13nyak43f/iRjHv0TgNaA32wvIP3ymOMMa+raEsIMQ9wIeP3bhEZjR6DDXqvOa54FUK0fNbrajrAOmTdPk4IcVxKWefh57TOpjydEOLHZ7wspZR9zRZGyRUhxDEpZd2H/nYDNkgpm2f3XnOdJ6+KeP6QIoSwAc4KIYYBVwE3jTMp2TDGvK6iuaz7Kidm3pktGihtyBvNOiev5nSt3kgyvjKOAL4k40rJtzRNpORIbud1Fc2tFUJ4AJOAMDKmvxcZ8kZzd6Hcy79zuv8jc05XSvmJ2UIoeZbZV15KKeO0zqIY7mnzulLKfpoGU7IlhHCUUqZk/ZuMD+nkrOeexdwHzZwz72QipJSXpJSfAV3MnEHJJSGEvxDiOHAMOC6EOCqEUPPx1qOJlPJNICbzfsuNgWoaZ1IMsz/rH1LKFCll7MPPPYu5T6FUc7rW7QcgSEq5B0AI0YyMM27qappKMVSu53UVbQghSgFlAWchRH3+vZaoEBnfyrJl7iKv5nStW3pWgQeQUu4VQqRpGUjJkVzP6yqa6UDGrf68yLiJd5b7wAeGrECTm4aoOV3rJISYDjgDv5JRIF4GkoFlAKovuWXLy7yuoi0hRE8p5e+5eq+ZD7z6k/H13j3zqVigr5Qy1GwhlFwTQux4xstSSqn6kluwJ909SN3NyzpkTtt8DZSRUnYSQvgAjaWUi7N7r7mna9ScrhUztC+5YlmMMa+raO7HzD8fZj4+A6wALK7IqzldKyaEKAl8Qy72JhRN5XleV9FcMSnlb0KI9wGklGlCiCfe1Ptx5i7yu4QQ83l0Tndn1j0o1ZyuxfuJXO5NKNrJvDPbz3mZ11U0l5B5E3YJIIRoRMZ0d7bMPSev5nStmBAiREoZIIQ4LKWsn/ncESmlr8bRFAPkZV5X0VbmjvAsMq5WPgkUB3pJKY9l916z7smrOV2rl+u9CcUi5HpeV9HcKWANkAjEAX+Qsf2yZdYrXoUQJYUQi4UQGzIf+wgh1CXV1mM08BdQWQixD1gCqBuvW49iUsrfAD1kzOsCBs3rKppbAtQg45jYLDKuVF5qyBvNPSf/E2pPwppVBjoB3kBPoCGWd+MZ5enUNzHrVVtK6fPQ4x1CiFOGvNHcvWvUnoR1+1hKeR/wJKPJ1RxgrraRlBzI+iZWSX0TszphmR/KAAghGgKHDHmjuffC1J6Edcv6QO4CLJRSrhNCfKVlICVHcj2vq2iuAfCPEOJy5uNyQERmw0AppXzqtUbmPrsm6whxbeAEOThCrGhPCLGWjKZy7QA/MhpeBUsp62kaTDGIEOI3Ms6N/7/Mp14DPKSUL2qXSjGEEKL8s16XUl562mvm3pNXc7rW7SWgIzBZSnlPCFEaGKdxJsVwuZ7XVbT1rCKeHXPPyas5XSsmpUyUUq6WUp7NfHxdSrlZ61yKwXI9r6tYL3MX+f/M6QIOZs6gKAVV1rxupBAikoybTgQIIY4LIdSUaT5l7qmSq5ltDdoB32W2OzX3B42iFFQdtQ6gmJ+5D7y6kPGDdlxKeTZzTreO+sqvKIpiGprcNERRFEUxDzVVoiiKko+pIq8oipKPqSKvKIqSj6kiryiKko+pIq8oipKP/T/ALWTCwbu6XwAAAABJRU5ErkJggg==\n",
2433 | "text/plain": [
2434 | ""
2435 | ]
2436 | },
2437 | "metadata": {
2438 | "needs_background": "light"
2439 | },
2440 | "output_type": "display_data"
2441 | }
2442 | ],
2443 | "source": [
2444 | "x_grp, x_grp_norm=coord_plot(iris, 'cluster')"
2445 | ]
2446 | },
2447 | {
2448 | "cell_type": "code",
2449 | "execution_count": 46,
2450 | "metadata": {},
2451 | "outputs": [
2452 | {
2453 | "data": {
2454 | "text/html": [
2455 | "\n",
2456 | "\n",
2469 | "
\n",
2470 | " \n",
2471 | " \n",
2472 | " \n",
2473 | " sepal_length \n",
2474 | " sepal_width \n",
2475 | " petal_length \n",
2476 | " petal_width \n",
2477 | " cluster \n",
2478 | " \n",
2479 | " \n",
2480 | " \n",
2481 | " \n",
2482 | " 0 \n",
2483 | " 5.006000 \n",
2484 | " 3.428000 \n",
2485 | " 1.462000 \n",
2486 | " 0.246000 \n",
2487 | " 0 \n",
2488 | " \n",
2489 | " \n",
2490 | " 1 \n",
2491 | " 5.901613 \n",
2492 | " 2.748387 \n",
2493 | " 4.393548 \n",
2494 | " 1.433871 \n",
2495 | " 1 \n",
2496 | " \n",
2497 | " \n",
2498 | " 2 \n",
2499 | " 6.850000 \n",
2500 | " 3.073684 \n",
2501 | " 5.742105 \n",
2502 | " 2.071053 \n",
2503 | " 2 \n",
2504 | " \n",
2505 | " \n",
2506 | "
\n",
2507 | "
"
2508 | ],
2509 | "text/plain": [
2510 | " sepal_length sepal_width petal_length petal_width cluster\n",
2511 | "0 5.006000 3.428000 1.462000 0.246000 0\n",
2512 | "1 5.901613 2.748387 4.393548 1.433871 1\n",
2513 | "2 6.850000 3.073684 5.742105 2.071053 2"
2514 | ]
2515 | },
2516 | "execution_count": 46,
2517 | "metadata": {},
2518 | "output_type": "execute_result"
2519 | }
2520 | ],
2521 | "source": [
2522 | "x_grp"
2523 | ]
2524 | },
2525 | {
2526 | "cell_type": "code",
2527 | "execution_count": 47,
2528 | "metadata": {},
2529 | "outputs": [
2530 | {
2531 | "data": {
2532 | "text/html": [
2533 | "\n",
2534 | "\n",
2547 | "
\n",
2548 | " \n",
2549 | " \n",
2550 | " \n",
2551 | " sepal_length \n",
2552 | " sepal_width \n",
2553 | " petal_length \n",
2554 | " petal_width \n",
2555 | " cluster \n",
2556 | " \n",
2557 | " \n",
2558 | " \n",
2559 | " \n",
2560 | " 0 \n",
2561 | " 0.00000 \n",
2562 | " 1.000000 \n",
2563 | " 0.000000 \n",
2564 | " 0.000000 \n",
2565 | " 0 \n",
2566 | " \n",
2567 | " \n",
2568 | " 1 \n",
2569 | " 0.48569 \n",
2570 | " 0.000000 \n",
2571 | " 0.684924 \n",
2572 | " 0.650869 \n",
2573 | " 1 \n",
2574 | " \n",
2575 | " \n",
2576 | " 2 \n",
2577 | " 1.00000 \n",
2578 | " 0.478651 \n",
2579 | " 1.000000 \n",
2580 | " 1.000000 \n",
2581 | " 2 \n",
2582 | " \n",
2583 | " \n",
2584 | "
\n",
2585 | "
"
2586 | ],
2587 | "text/plain": [
2588 | " sepal_length sepal_width petal_length petal_width cluster\n",
2589 | "0 0.00000 1.000000 0.000000 0.000000 0\n",
2590 | "1 0.48569 0.000000 0.684924 0.650869 1\n",
2591 | "2 1.00000 0.478651 1.000000 1.000000 2"
2592 | ]
2593 | },
2594 | "execution_count": 47,
2595 | "metadata": {},
2596 | "output_type": "execute_result"
2597 | }
2598 | ],
2599 | "source": [
2600 | "x_grp_norm"
2601 | ]
2602 | },
2603 | {
2604 | "cell_type": "code",
2605 | "execution_count": null,
2606 | "metadata": {},
2607 | "outputs": [],
2608 | "source": []
2609 | }
2610 | ],
2611 | "metadata": {
2612 | "kernelspec": {
2613 | "display_name": "Python 3",
2614 | "language": "python",
2615 | "name": "python3"
2616 | },
2617 | "language_info": {
2618 | "codemirror_mode": {
2619 | "name": "ipython",
2620 | "version": 3
2621 | },
2622 | "file_extension": ".py",
2623 | "mimetype": "text/x-python",
2624 | "name": "python",
2625 | "nbconvert_exporter": "python",
2626 | "pygments_lexer": "ipython3",
2627 | "version": "3.7.5"
2628 | }
2629 | },
2630 | "nbformat": 4,
2631 | "nbformat_minor": 4
2632 | }
2633 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "funpymodeling"
3 | version = "0.1.7"
4 | description = "A package designed for data scientists and teachers, to speed up their ML projects, focused on exploratory data analysis, data preparation, and model performance."
5 | license="MIT"
6 | authors = ["Pablo Casas "]
7 | readme = "README.md"
8 | repository = "https://github.com/pablo14/funPyModeling"
9 | documentation = "https://github.com/pablo14/funPyModeling"
10 |
11 | [tool.poetry.dependencies]
12 | python = ">=3.8.1,<4.0"
13 | pandas = "^2.0.2"
14 | numpy = "^1.24.3"
15 | matplotlib = "^3.7.1"
16 | typing-extensions = "^4.6.3"
17 | scikit-learn = "^1.2.2"
18 | seaborn = "^0.12.2"
19 | flake8 = "^6.0.0"
20 | jupyter = "^1.0.0"
21 | pre-commit = "^3.3.2"
22 | pytest = "^7.3.1"
23 |
24 |
25 | [build-system]
26 | requires = ["poetry-core"]
27 | build-backend = "poetry.core.masonry.api"
28 |
--------------------------------------------------------------------------------