├── .gitignore
├── LICENSE
├── README.md
└── python_in_powerbi
    ├── Pipfile
    ├── README.md
    ├── boston_housing.ipynb
    ├── boston_housing.pbit
    ├── boston_housing_dataset.py
    ├── heatmap_pc1.py
    ├── heatmap_pc2.py
    └── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # VS Code
107 | .vscode
108 | 
109 | # Mac OS
110 | .DS_Store


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Tommi Ranta
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # data-science-blog
 2 | Collection of code/examples used in my articles on Medium and LinkedIn
 3 | 
 4 | ## Python in Power BI
 5 | This article showcases how to use Python scripts in Power BI. It covers the following topics:
 6 | * Set up a Python virtual environment and required libraries for Power BI
 7 | * Enable and configure Python scripting in Power BI
 8 | * Import data in Power BI using a Python script
 9 | * Add Python visuals to a Power BI report
10 | 
11 | https://github.com/tompp4/data-science-blog/tree/master/python_in_powerbi
12 |  
13 | 


--------------------------------------------------------------------------------
/python_in_powerbi/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | numpy = "*"
 8 | pandas = "*"
 9 | matplotlib = "*"
10 | seaborn = "*"
11 | scikit-learn = "*"
12 | jupyterlab = "*"
13 | 
14 | [dev-packages]
15 | "pep8" = "*"
16 | 
17 | [requires]
18 | python_version = "3"
19 | 


--------------------------------------------------------------------------------
/python_in_powerbi/README.md:
--------------------------------------------------------------------------------
 1 | # Python in Power BI - Best of two worlds?
 2 | Links to blog post: [Medium](https://www.medium.com) & [LinkedIn](https://www.linkedin.com)
 3 | 
 4 | This blog post focused on showcasing how to use Python in Power BI. We use the [Boston Housing dataset](http://lib.stat.cmu.edu/datasets/boston) that is available from [scikit-learn](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_boston.html). In this example the dataset is used for clustering although it is usually used for solving regression tasks. First we use PCA to reduce the dimensions to 2 that will allow us to visualize the data in a 2-dimensional plot. Afterwards we apply K-Means clustering to identify any homogenous groups in the data. Finally this data is utilized to make visualizations in Microsoft Power BI Desktop.
 5 | 
 6 | ## Pre-requisites
 7 | This example assumes that Power BI Desktop ([download for free](https://powerbi.microsoft.com/en-us/downloads/)) and Python + [pipenv](https://docs.pipenv.org) is installed on the system. Feel free to use a another instance of Python, you can check the required libraries from the `Pipfile`. 
 8 | 
 9 | ## Setting up Python in Power BI
10 | * Clone or download the repository
11 | * Open Command Prompt and navigate to the `python_in_powerbi` folder
12 | * Run `pipenv install` to install all required packages (add the `--dev` option to include pep8 linter)
13 | * Run `pipenv --venv` and take note of the location of the virtual environment
14 | * Open Power BI Desktop and open the `Options & Settings` > `Options`
15 |  * Navigate to `Python scripting` and change the `Detected Python home directories` to `Other`
16 |  * `Browse` to the `Scripts` folder that is found in the newly created virtual environment.
17 |  * Click OK and you're ready for Python in Power BI!
18 | 
19 |  ### Use virtualenv as Python environment (optional)
20 |  You can use virtualenv instead of pipenv to manage the Python environment for Power BI. Just replace the pipenv steps above with the following commands.
21 | * Open the Command Prompt and navigate to the `python_in_powerbi` folder
22 | * `python -m venv venv` to create a virtual environment
23 | * `venv\Scripts\activate.bat` to activate the virtual environment
24 | * `(venv) pip install -r requirements.txt` to install the Python libraries
25 | * The path to your python environment is `<full_path_to_python_in_powerbi>\venv\Scripts` 
26 | 
27 |  ## Using Python in Power BI
28 |  Read my blog post for a step-by-step walkthrough how to use Python in Power BI.
29 | 
30 |  ## File descriptions
31 |  * `boston_housing.pbit` - Ready to use Power BI template. You only need to make sure your Python environment is configured in Power BI
32 |  * `boston_housing.ipynb` - Jupyter notebook, which contains the exporatory data anlysis and modeling work done to create the scripts.
33 |  * `boston_housing.py` - Script that is used to load the dataset in Power BI
34 |  * `heatmap_pc1.py` - Script that is used to display a heatmap of principal component 1
35 |  * `heatmap_pc2.py` - Script that is used to display a heatmap of principal component 2
36 | 
37 |  ## Some helpful links
38 | * Homepage of Power BI Desktop where you can download the software - https://powerbi.microsoft.com/en-us/desktop
39 | * Blog post showcasing various Python visualizations - https://powerbi.microsoft.com/en-us/blog/pythonblogepisode1
40 | * Official Power BI documentation for using Python scripts - https://docs.microsoft.com/en-gb/power-bi/desktop-python-scripts
41 | * Power BI requires ChromeDriver for visualizations whose original output is in HTML format - http://chromedriver.chromium.org/downloads
42 |  


--------------------------------------------------------------------------------
/python_in_powerbi/boston_housing.pbit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tommiranta/data-science-blog/f968a7885d994d737130e97e4433802b82811ecb/python_in_powerbi/boston_housing.pbit


--------------------------------------------------------------------------------
/python_in_powerbi/boston_housing_dataset.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn.datasets import load_boston
 4 | from sklearn.preprocessing import StandardScaler
 5 | from sklearn.decomposition import PCA
 6 | from sklearn.cluster import KMeans
 7 | 
 8 | # utilize the sklearn.datasets package to load the Boston Housing dataset
 9 | boston = load_boston()
10 | 
11 | # scale the data to same value range first since PCA
12 | # is sensitive to the scaling of data
13 | sc = StandardScaler()
14 | X = sc.fit_transform(boston.data)
15 | 
16 | # create PCA with n_components=2 to allow visualization in 2 dimensions
17 | pca = PCA(n_components=2)
18 | X_pca = pca.fit_transform(X)
19 | 
20 | # divide data into 5 clusters (refer to .ipynb for motivation)
21 | kmeans = KMeans(n_clusters=5, init='k-means++', max_iter=300, n_init=10)
22 | y_kmeans = kmeans.fit_predict(X_pca)
23 | 
24 | # create pandas dataframe of the housing data for Power BI
25 | columns = np.append(boston.feature_names, ['MEDV', 'PC1', 'PC2', 'CLUSTER'])
26 | data = np.concatenate((boston.data,
27 |                        boston.target.reshape(-1, 1),
28 |                        X_pca,
29 |                        y_kmeans.reshape(-1, 1)),
30 |                       axis=1)
31 | df_housing = pd.DataFrame(data=data, columns=columns)
32 | # we need to convert all columns as string because of different
33 | # decimal separator in Python (.) and Finnish locale (,) that Power BI uses.
34 | # comment out below line if Power BI uses dot as a decimal separator.
35 | df_housing = df_housing.astype('str')
36 | 
37 | # create pandas dataframe of the pca data for Power BI
38 | columns = np.append(boston.feature_names, ['VARRATIO'])
39 | data = np.concatenate((pca.components_,
40 |                        pca.explained_variance_ratio_.reshape(-1, 1)),
41 |                       axis=1)
42 | df_pca = pd.DataFrame(data=data, columns=columns, index=['PC1', 'PC2'])
43 | df_pca = df_pca.astype('str')
44 | 
45 | # debug prints
46 | # uncomment below lines and run script to test that everything works.
47 | # print(df_housing.sample(5))
48 | # print(df_pca)
49 | 


--------------------------------------------------------------------------------
/python_in_powerbi/heatmap_pc1.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns
 3 | 
 4 | dataset.index = ['PC1', 'PC2']
 5 | plt.figure(figsize=(8, 2))
 6 | plt.xticks(rotation=45)
 7 | data = dataset.loc['PC1', :].to_frame().sort_values(by='PC1').transpose()
 8 | sns.heatmap(data,
 9 |             cmap='plasma',
10 |             square=True,
11 |             annot=True,
12 |             cbar=False,
13 |             yticklabels='')
14 | plt.show()
15 | 


--------------------------------------------------------------------------------
/python_in_powerbi/heatmap_pc2.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns
 3 | 
 4 | dataset.index = ['PC1', 'PC2']
 5 | plt.figure(figsize=(2, 8))
 6 | data = dataset.loc['PC2', :].to_frame().sort_values(by='PC2', ascending=False)
 7 | sns.heatmap(data,
 8 |             cmap='plasma',
 9 |             square=True,
10 |             annot=True,
11 |             cbar=False,
12 |             xticklabels='')
13 | plt.show()
14 | 


--------------------------------------------------------------------------------
/python_in_powerbi/requirements.txt:
--------------------------------------------------------------------------------
 1 | anyio==3.3.0
 2 | appnope==0.1.2
 3 | argon2-cffi==20.1.0
 4 | attrs==21.2.0
 5 | Babel==2.9.1
 6 | backcall==0.2.0
 7 | bleach==4.0.0
 8 | certifi==2024.7.4
 9 | cffi==1.14.6
10 | charset-normalizer==2.0.4
11 | cycler==0.10.0
12 | debugpy==1.4.1
13 | decorator==5.0.9
14 | defusedxml==0.7.1
15 | entrypoints==0.3
16 | idna==3.7
17 | ipykernel==6.2.0
18 | ipython==8.10.0
19 | ipython-genutils==0.2.0
20 | jedi==0.18.0
21 | Jinja2==3.1.6
22 | joblib==1.2.0
23 | json5==0.9.6
24 | jsonschema==3.2.0
25 | jupyter-client==7.0.1
26 | jupyter-core==4.7.1
27 | jupyter-server==2.11.2
28 | jupyterlab==3.6.8
29 | jupyterlab-pygments==0.1.2
30 | jupyterlab-server==2.7.2
31 | kiwisolver==1.3.1
32 | MarkupSafe==2.0.1
33 | matplotlib==3.4.3
34 | matplotlib-inline==0.1.2
35 | mistune==2.0.3
36 | nbclassic==0.3.1
37 | nbclient==0.5.4
38 | nbconvert==6.5.1
39 | nbformat==5.1.3
40 | nest-asyncio==1.5.1
41 | notebook==6.4.12
42 | numpy==1.22.0
43 | packaging==21.0
44 | pandas==1.3.2
45 | pandocfilters==1.4.3
46 | parso==0.8.2
47 | pexpect==4.8.0
48 | pickleshare==0.7.5
49 | Pillow==10.3.0
50 | prometheus-client==0.11.0
51 | prompt-toolkit==3.0.20
52 | ptyprocess==0.7.0
53 | pycparser==2.20
54 | Pygments==2.15.0
55 | pyparsing==2.4.7
56 | pyrsistent==0.18.0
57 | python-dateutil==2.8.2
58 | pytz==2021.1
59 | pyzmq==22.2.1
60 | requests==2.32.0
61 | requests-unixsocket==0.2.0
62 | scikit-learn==1.5.0
63 | scipy==1.10.0
64 | seaborn==0.11.2
65 | Send2Trash==1.8.0
66 | six==1.16.0
67 | sniffio==1.2.0
68 | terminado==0.11.1
69 | testpath==0.5.0
70 | threadpoolctl==2.2.0
71 | tornado==6.5.1
72 | traitlets==5.0.5
73 | urllib3==1.26.19
74 | wcwidth==0.2.5
75 | webencodings==0.5.1
76 | websocket-client==1.2.1
77 | 


--------------------------------------------------------------------------------